[llvm-branch-commits] [clang] release/19.x: [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170) (PR #100216)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/100216 >From 1a5c4e5c4fded8293985dc1875a971f7783cfc45 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 23 Jul 2024 14:41:57 -0500 Subject: [PATCH] [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170) Summary: This was not forwarded properly as it would try to pass it to `nvlink`. Fixes https://github.com/llvm/llvm-project/issues/100168 (cherry picked from commit 7e1fcf5dd657d465c3fc846f56c6f9d3a4560b43) --- clang/lib/Driver/ToolChains/Cuda.cpp | 4 clang/test/Driver/linker-wrapper-passes.c | 1 - clang/test/Driver/nvlink-wrapper.c | 7 +++ clang/tools/clang-nvlink-wrapper/NVLinkOpts.td | 4 ++-- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 59453c484ae4f4..61d12b10dfb62b 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -609,6 +609,10 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString( "--pxtas-path=" + Args.getLastArgValue(options::OPT_ptxas_path_EQ))); + if (Args.hasArg(options::OPT_cuda_path_EQ)) +CmdArgs.push_back(Args.MakeArgString( +"--cuda-path=" + Args.getLastArgValue(options::OPT_cuda_path_EQ))); + // Add paths specified in LIBRARY_PATH environment variable as -L options. addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); diff --git a/clang/test/Driver/linker-wrapper-passes.c b/clang/test/Driver/linker-wrapper-passes.c index aadcf472e9b636..b257c942afa075 100644 --- a/clang/test/Driver/linker-wrapper-passes.c +++ b/clang/test/Driver/linker-wrapper-passes.c @@ -3,7 +3,6 @@ // REQUIRES: llvm-plugins, llvm-examples // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target - // Setup. // RUN: mkdir -p %t // RUN: %clang -cc1 -emit-llvm-bc -o %t/host-x86_64-unknown-linux-gnu.bc \ diff --git a/clang/test/Driver/nvlink-wrapper.c b/clang/test/Driver/nvlink-wrapper.c index fdda93f1f9cdc1..318315ddaca340 100644 --- a/clang/test/Driver/nvlink-wrapper.c +++ b/clang/test/Driver/nvlink-wrapper.c @@ -63,3 +63,10 @@ int baz() { return y + x; } // RUN: -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LTO // LTO: ptxas{{.*}} -m64 -c [[PTX:.+]].s -O3 -arch sm_52 -o [[CUBIN:.+]].cubin // LTO: nvlink{{.*}} -arch sm_52 -o a.out [[CUBIN]].cubin {{.*}}-u-{{.*}}.cubin {{.*}}-y-{{.*}}.cubin + +// +// Check that we don't forward some arguments. +// +// RUN: clang-nvlink-wrapper --dry-run %t.o %t-u.o %t-y.a \ +// RUN: -arch sm_52 --cuda-path/opt/cuda -o a.out 2>&1 | FileCheck %s --check-prefix=PATH +// PATH-NOT: --cuda-path=/opt/cuda diff --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td index e84b530f2787d3..8c80a51b12a44e 100644 --- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td +++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td @@ -12,9 +12,9 @@ def verbose : Flag<["-"], "v">, HelpText<"Print verbose information">; def version : Flag<["--"], "version">, HelpText<"Display the version number and exit">; -def cuda_path_EQ : Joined<["--"], "cuda-path=">, +def cuda_path_EQ : Joined<["--"], "cuda-path=">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Set the system CUDA path">; -def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, +def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Set the 'ptxas' path">; def o : JoinedOrSeparate<["-"], "o">, MetaVarName<"">, ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 1a5c4e5 - [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170)
Author: Joseph Huber Date: 2024-08-19T09:02:52+02:00 New Revision: 1a5c4e5c4fded8293985dc1875a971f7783cfc45 URL: https://github.com/llvm/llvm-project/commit/1a5c4e5c4fded8293985dc1875a971f7783cfc45 DIFF: https://github.com/llvm/llvm-project/commit/1a5c4e5c4fded8293985dc1875a971f7783cfc45.diff LOG: [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170) Summary: This was not forwarded properly as it would try to pass it to `nvlink`. Fixes https://github.com/llvm/llvm-project/issues/100168 (cherry picked from commit 7e1fcf5dd657d465c3fc846f56c6f9d3a4560b43) Added: Modified: clang/lib/Driver/ToolChains/Cuda.cpp clang/test/Driver/linker-wrapper-passes.c clang/test/Driver/nvlink-wrapper.c clang/tools/clang-nvlink-wrapper/NVLinkOpts.td Removed: diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 59453c484ae4f4..61d12b10dfb62b 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -609,6 +609,10 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString( "--pxtas-path=" + Args.getLastArgValue(options::OPT_ptxas_path_EQ))); + if (Args.hasArg(options::OPT_cuda_path_EQ)) +CmdArgs.push_back(Args.MakeArgString( +"--cuda-path=" + Args.getLastArgValue(options::OPT_cuda_path_EQ))); + // Add paths specified in LIBRARY_PATH environment variable as -L options. addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); diff --git a/clang/test/Driver/linker-wrapper-passes.c b/clang/test/Driver/linker-wrapper-passes.c index aadcf472e9b636..b257c942afa075 100644 --- a/clang/test/Driver/linker-wrapper-passes.c +++ b/clang/test/Driver/linker-wrapper-passes.c @@ -3,7 +3,6 @@ // REQUIRES: llvm-plugins, llvm-examples // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target - // Setup. // RUN: mkdir -p %t // RUN: %clang -cc1 -emit-llvm-bc -o %t/host-x86_64-unknown-linux-gnu.bc \ diff --git a/clang/test/Driver/nvlink-wrapper.c b/clang/test/Driver/nvlink-wrapper.c index fdda93f1f9cdc1..318315ddaca340 100644 --- a/clang/test/Driver/nvlink-wrapper.c +++ b/clang/test/Driver/nvlink-wrapper.c @@ -63,3 +63,10 @@ int baz() { return y + x; } // RUN: -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LTO // LTO: ptxas{{.*}} -m64 -c [[PTX:.+]].s -O3 -arch sm_52 -o [[CUBIN:.+]].cubin // LTO: nvlink{{.*}} -arch sm_52 -o a.out [[CUBIN]].cubin {{.*}}-u-{{.*}}.cubin {{.*}}-y-{{.*}}.cubin + +// +// Check that we don't forward some arguments. +// +// RUN: clang-nvlink-wrapper --dry-run %t.o %t-u.o %t-y.a \ +// RUN: -arch sm_52 --cuda-path/opt/cuda -o a.out 2>&1 | FileCheck %s --check-prefix=PATH +// PATH-NOT: --cuda-path=/opt/cuda diff --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td index e84b530f2787d3..8c80a51b12a44e 100644 --- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td +++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td @@ -12,9 +12,9 @@ def verbose : Flag<["-"], "v">, HelpText<"Print verbose information">; def version : Flag<["--"], "version">, HelpText<"Display the version number and exit">; -def cuda_path_EQ : Joined<["--"], "cuda-path=">, +def cuda_path_EQ : Joined<["--"], "cuda-path=">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Set the system CUDA path">; -def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, +def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Set the 'ptxas' path">; def o : JoinedOrSeparate<["-"], "o">, MetaVarName<"">, ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170) (PR #100216)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/100216 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) (PR #101506)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/101506 >From c45fc691a2a39318fc146ba1665a2fe2d9f43b2b Mon Sep 17 00:00:00 2001 From: Yeting Kuo <46629943+yeti...@users.noreply.github.com> Date: Thu, 1 Aug 2024 09:37:42 +0800 Subject: [PATCH] [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types. (cherry picked from commit 87af9ee870ad7ca93abced0b09459c3760dec891) --- llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 19 + .../RISCV/rvv/fixed-vectors-strided-vpload.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 80 +-- 3 files changed, 79 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 0a66a38f6d5abc..be2e880ecd3a98 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { auto *VTy = cast(II.getType()); IRBuilder<> Builder(&II); - - // Extend VL from i32 to XLen if needed. - if (ST->is64Bit()) -VL = Builder.CreateZExt(VL, Builder.getInt64Ty()); - Type *STy = VTy->getElementType(); Value *Val = Builder.CreateLoad(STy, BasePtr); - const auto &TLI = *ST->getTargetLowering(); - Value *Res; - - // TODO: Also support fixed/illegal vector types to splat with evl = vl. - if (isa(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) { -unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f - : Intrinsic::riscv_vmv_v_x; -Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()}, - {PoisonValue::get(VTy), Val, VL}); - } else { -Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val); - } + Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy}, + {Val, II.getOperand(2), VL}); II.replaceAllUsesWith(Res); II.eraseFromParent(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index b8c7037580c46b..849f98c26f4593 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -638,14 +638,14 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT:vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-OPT-NEXT:vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-OPT-NEXT:vlse8.v v8, (a0), zero ; CHECK-OPT-NEXT:ret ; ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-NO-OPT: # %bb.0: ; CHECK-NO-OPT-NEXT:lbu a0, 0(a0) -; CHECK-NO-OPT-NEXT:vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NO-OPT-NEXT:vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NO-OPT-NEXT:vmv.v.x v8, a0 ; CHECK-NO-OPT-NEXT:ret %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3) @@ -657,14 +657,14 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT:vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-OPT-NEXT:vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-OPT-NEXT:vlse16.v v8, (a0), zero ; CHECK-OPT-NEXT:ret ; ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-NO-OPT: # %bb.0: ; CHECK-NO-OPT-NEXT:flh fa5, 0(a0) -; CHECK-NO-OPT-NEXT:vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NO-OPT-NEXT:vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NO-OPT-NEXT:vfmv.v.f v8, fa5 ; CHECK-NO-OPT-NEXT:ret %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 0010f64a93fd62..14976f21b7dbba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+z
[llvm-branch-commits] [llvm] c45fc69 - [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329)
Author: Yeting Kuo Date: 2024-08-19T09:04:45+02:00 New Revision: c45fc691a2a39318fc146ba1665a2fe2d9f43b2b URL: https://github.com/llvm/llvm-project/commit/c45fc691a2a39318fc146ba1665a2fe2d9f43b2b DIFF: https://github.com/llvm/llvm-project/commit/c45fc691a2a39318fc146ba1665a2fe2d9f43b2b.diff LOG: [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types. (cherry picked from commit 87af9ee870ad7ca93abced0b09459c3760dec891) Added: Modified: llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll Removed: diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 0a66a38f6d5abc..be2e880ecd3a98 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { auto *VTy = cast(II.getType()); IRBuilder<> Builder(&II); - - // Extend VL from i32 to XLen if needed. - if (ST->is64Bit()) -VL = Builder.CreateZExt(VL, Builder.getInt64Ty()); - Type *STy = VTy->getElementType(); Value *Val = Builder.CreateLoad(STy, BasePtr); - const auto &TLI = *ST->getTargetLowering(); - Value *Res; - - // TODO: Also support fixed/illegal vector types to splat with evl = vl. - if (isa(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) { -unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f - : Intrinsic::riscv_vmv_v_x; -Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()}, - {PoisonValue::get(VTy), Val, VL}); - } else { -Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val); - } + Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy}, + {Val, II.getOperand(2), VL}); II.replaceAllUsesWith(Res); II.eraseFromParent(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index b8c7037580c46b..849f98c26f4593 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -638,14 +638,14 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT:vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-OPT-NEXT:vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-OPT-NEXT:vlse8.v v8, (a0), zero ; CHECK-OPT-NEXT:ret ; ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-NO-OPT: # %bb.0: ; CHECK-NO-OPT-NEXT:lbu a0, 0(a0) -; CHECK-NO-OPT-NEXT:vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NO-OPT-NEXT:vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NO-OPT-NEXT:vmv.v.x v8, a0 ; CHECK-NO-OPT-NEXT:ret %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3) @@ -657,14 +657,14 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT:vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-OPT-NEXT:vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-OPT-NEXT:vlse16.v v8, (a0), zero ; CHECK-OPT-NEXT:ret ; ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-NO-OPT: # %bb.0: ; CHECK-NO-OPT-NEXT:flh fa5, 0(a0) -; CHECK-NO-OPT-NEXT:vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NO-OPT-NEXT:vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NO-OPT-NEXT:vfmv.v.f v8, fa5 ; CHECK-NO-OPT-NEXT:ret %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 0010f64a93fd62..14976f21b7dbba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) (PR #101506)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/101506 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170) (PR #100216)
github-actions[bot] wrote: @jhuber6 (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/100216 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) (PR #101506)
github-actions[bot] wrote: @topperc (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/101506 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/102179 >From 2ab8d93061581edad3501561722ebd5632d73892 Mon Sep 17 00:00:00 2001 From: yandalur Date: Thu, 1 Aug 2024 21:37:23 +0530 Subject: [PATCH] [Hexagon] Do not optimize address of another function's block (#101209) When the constant extender optimization pass encounters an instruction that uses an extended address pointing to another function's block, avoid adding the instruction to the extender list for the current machine function. Fixes https://github.com/llvm/llvm-project/issues/99714 (cherry picked from commit 68df06a0b2998765cb0a41353fcf0919bbf57ddb) --- .../Target/Hexagon/HexagonConstExtenders.cpp | 4 + .../CodeGen/Hexagon/cext-opt-block-addr.mir | 173 ++ 2 files changed, 177 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index f0933765bbcbda..86ce6b4e05ed27 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -1223,6 +1223,10 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) { if (ER.Kind == MachineOperand::MO_GlobalAddress) if (ER.V.GV->getName().empty()) return; + // Ignore block address that points to block in another function + if (ER.Kind == MachineOperand::MO_BlockAddress) +if (ER.V.BA->getFunction() != &(MI.getMF()->getFunction())) + return; Extenders.push_back(ED); } diff --git a/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir new file mode 100644 index 00..9f140132dcd6c3 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir @@ -0,0 +1,173 @@ +# REQUIRES: asserts +# RUN: llc -march=hexagon -run-pass hexagon-cext-opt %s -o - | FileCheck %s + +# Check that the HexagonConstantExtenders pass does not assert when block +# addresses from different functions are used +# CHECK-LABEL: name: wibble +# CHECK: A2_tfrsi blockaddress(@baz +# CHECK: A2_tfrsi blockaddress(@wibble + +--- | + target triple = "hexagon" + + define dso_local void @baz() { + bb: +br label %bb1 + + bb1: ; preds = %bb +%call = tail call fastcc i32 @wibble(i32 poison) +ret void + } + + define internal fastcc i32 @wibble(i32 %arg) { + bb: +%call = tail call i32 @eggs(i32 noundef ptrtoint (ptr blockaddress(@baz, %bb1) to i32)) +br label %bb1 + + bb1: ; preds = %bb +tail call void @baz.1(i32 noundef ptrtoint (ptr blockaddress(@wibble, %bb1) to i32)) +ret i32 %call + } + + declare i32 @eggs(i32 noundef) local_unnamed_addr + + declare void @baz.1(i32 noundef) local_unnamed_addr + +... +--- +name:baz +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected:false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: intregs, preferred-register: '' } +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment:1 + adjustsStack:false + hasCalls:false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: true + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint:'' +fixedStack: [] +stack: [] +entry_values:[] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.bb: +successors: %bb.1(0x8000) + + bb.1.bb1 (ir-block-address-taken %ir-block.bb1): +%0:intregs = IMPLICIT_DEF +$r0 = COPY %0 +PS_tailcall_i @wibble, hexagoncsr, implicit $r0 + +... +--- +name:wibble +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected:false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: intregs, preferred-register: '' } + - { id: 1, class: intregs, preferred-register: '' } + - { id: 2, class: intregs, preferred-reg
[llvm-branch-commits] [llvm] 2ab8d93 - [Hexagon] Do not optimize address of another function's block (#101209)
Author: yandalur Date: 2024-08-19T09:06:57+02:00 New Revision: 2ab8d93061581edad3501561722ebd5632d73892 URL: https://github.com/llvm/llvm-project/commit/2ab8d93061581edad3501561722ebd5632d73892 DIFF: https://github.com/llvm/llvm-project/commit/2ab8d93061581edad3501561722ebd5632d73892.diff LOG: [Hexagon] Do not optimize address of another function's block (#101209) When the constant extender optimization pass encounters an instruction that uses an extended address pointing to another function's block, avoid adding the instruction to the extender list for the current machine function. Fixes https://github.com/llvm/llvm-project/issues/99714 (cherry picked from commit 68df06a0b2998765cb0a41353fcf0919bbf57ddb) Added: llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir Modified: llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp Removed: diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index f0933765bbcbda..86ce6b4e05ed27 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -1223,6 +1223,10 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) { if (ER.Kind == MachineOperand::MO_GlobalAddress) if (ER.V.GV->getName().empty()) return; + // Ignore block address that points to block in another function + if (ER.Kind == MachineOperand::MO_BlockAddress) +if (ER.V.BA->getFunction() != &(MI.getMF()->getFunction())) + return; Extenders.push_back(ED); } diff --git a/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir new file mode 100644 index 00..9f140132dcd6c3 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir @@ -0,0 +1,173 @@ +# REQUIRES: asserts +# RUN: llc -march=hexagon -run-pass hexagon-cext-opt %s -o - | FileCheck %s + +# Check that the HexagonConstantExtenders pass does not assert when block +# addresses from diff erent functions are used +# CHECK-LABEL: name: wibble +# CHECK: A2_tfrsi blockaddress(@baz +# CHECK: A2_tfrsi blockaddress(@wibble + +--- | + target triple = "hexagon" + + define dso_local void @baz() { + bb: +br label %bb1 + + bb1: ; preds = %bb +%call = tail call fastcc i32 @wibble(i32 poison) +ret void + } + + define internal fastcc i32 @wibble(i32 %arg) { + bb: +%call = tail call i32 @eggs(i32 noundef ptrtoint (ptr blockaddress(@baz, %bb1) to i32)) +br label %bb1 + + bb1: ; preds = %bb +tail call void @baz.1(i32 noundef ptrtoint (ptr blockaddress(@wibble, %bb1) to i32)) +ret i32 %call + } + + declare i32 @eggs(i32 noundef) local_unnamed_addr + + declare void @baz.1(i32 noundef) local_unnamed_addr + +... +--- +name:baz +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected:false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: intregs, preferred-register: '' } +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment:1 + adjustsStack:false + hasCalls:false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: true + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint:'' +fixedStack: [] +stack: [] +entry_values:[] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.bb: +successors: %bb.1(0x8000) + + bb.1.bb1 (ir-block-address-taken %ir-block.bb1): +%0:intregs = IMPLICIT_DEF +$r0 = COPY %0 +PS_tailcall_i @wibble, hexagoncsr, implicit $r0 + +... +--- +name:wibble +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected:false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: intregs, preferred-register: '' } + - { id: 1, class: intregs
[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/102179 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)
github-actions[bot] wrote: @yandalur (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/102179 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [AArch64] Add GCS release notes (PR #103866)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/103866 >From 1cfd6754454ba62fd0ad306c09a7b6d526f835dc Mon Sep 17 00:00:00 2001 From: John Brawn Date: Wed, 14 Aug 2024 12:22:51 +0100 Subject: [PATCH] [AArch64] Add GCS release notes --- clang/docs/ReleaseNotes.rst | 5 + 1 file changed, 5 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5cd398c22c946d..b56e7177846d99 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1207,6 +1207,11 @@ Arm and AArch64 Support * Arm Neoverse-N3 (neoverse-n3). * Arm Neoverse-V3 (neoverse-v3). * Arm Neoverse-V3AE (neoverse-v3ae). +- ``-mbranch-protection=gcs`` has been added which enables support for the + Guarded Control Stack extension, and ``-mbranch-protection=standard`` also + enables this. Enabling GCS causes the GCS GNU property bit to be set on output + objects. It doesn't cause any code generation changes, as the code generated + by clang is already compatible with GCS. Android Support ^^^ ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 1cfd675 - [AArch64] Add GCS release notes
Author: John Brawn Date: 2024-08-19T09:10:20+02:00 New Revision: 1cfd6754454ba62fd0ad306c09a7b6d526f835dc URL: https://github.com/llvm/llvm-project/commit/1cfd6754454ba62fd0ad306c09a7b6d526f835dc DIFF: https://github.com/llvm/llvm-project/commit/1cfd6754454ba62fd0ad306c09a7b6d526f835dc.diff LOG: [AArch64] Add GCS release notes Added: Modified: clang/docs/ReleaseNotes.rst Removed: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5cd398c22c946d..b56e7177846d99 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1207,6 +1207,11 @@ Arm and AArch64 Support * Arm Neoverse-N3 (neoverse-n3). * Arm Neoverse-V3 (neoverse-v3). * Arm Neoverse-V3AE (neoverse-v3ae). +- ``-mbranch-protection=gcs`` has been added which enables support for the + Guarded Control Stack extension, and ``-mbranch-protection=standard`` also + enables this. Enabling GCS causes the GCS GNU property bit to be set on output + objects. It doesn't cause any code generation changes, as the code generated + by clang is already compatible with GCS. Android Support ^^^ ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [AArch64] Add GCS release notes (PR #103866)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/103866 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: Revert "[CGData] llvm-cgdata (#89884)" (PR #103886)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/103886 >From 9e90c40564e21dc5f1a12e08cfdf29305aaf9f50 Mon Sep 17 00:00:00 2001 From: Gulfem Savrun Yeniceri Date: Tue, 23 Jul 2024 11:06:30 + Subject: [PATCH] Revert "[CGData] llvm-cgdata (#89884)" This reverts commit d3fb41dddc11b0ebc338a3b9e6a5ab7288ff7d1d and forward fix patches because of the issue explained in: https://github.com/llvm/llvm-project/pull/89884#issuecomment-2244348117. Revert "Fix tests for https://github.com/llvm/llvm-project/pull/89884 (#100061)" This reverts commit 67937a3f969aaf97a745a45281a0d22273bff713. Revert "Fix build break for https://github.com/llvm/llvm-project/pull/89884 (#100050)" This reverts commit c33878c5787c128234d533ad19d672dc3eea19a8. Revert "[CGData] Fix -Wpessimizing-move in CodeGenDataReader.cpp (NFC)" This reverts commit 1f8b2b146141f3563085a1acb77deb50857a636d. (cherry picked from commit 73d78973fe072438f0f73088f889c66845b2b51a) --- llvm/include/llvm/CodeGenData/CodeGenData.h | 204 - llvm/include/llvm/CodeGenData/CodeGenData.inc | 46 --- .../llvm/CodeGenData/CodeGenDataReader.h | 154 -- .../llvm/CodeGenData/CodeGenDataWriter.h | 68 - llvm/lib/CodeGenData/CMakeLists.txt | 3 - llvm/lib/CodeGenData/CodeGenData.cpp | 196 - llvm/lib/CodeGenData/CodeGenDataReader.cpp| 175 llvm/lib/CodeGenData/CodeGenDataWriter.cpp| 162 --- llvm/test/CMakeLists.txt | 1 - llvm/test/lit.cfg.py | 1 - llvm/test/tools/llvm-cgdata/dump.test | 32 --- llvm/test/tools/llvm-cgdata/empty.test| 35 --- llvm/test/tools/llvm-cgdata/error.test| 38 --- .../test/tools/llvm-cgdata/merge-archive.test | 90 -- llvm/test/tools/llvm-cgdata/merge-concat.test | 83 -- llvm/test/tools/llvm-cgdata/merge-double.test | 87 -- llvm/test/tools/llvm-cgdata/merge-single.test | 49 llvm/test/tools/llvm-cgdata/show.test | 30 -- llvm/tools/llvm-cgdata/CMakeLists.txt | 15 - llvm/tools/llvm-cgdata/llvm-cgdata.cpp| 268 -- 20 files changed, 1737 deletions(-) delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenData.h delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenData.inc delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenDataReader.h delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenDataWriter.h delete mode 100644 llvm/lib/CodeGenData/CodeGenData.cpp delete mode 100644 llvm/lib/CodeGenData/CodeGenDataReader.cpp delete mode 100644 llvm/lib/CodeGenData/CodeGenDataWriter.cpp delete mode 100644 llvm/test/tools/llvm-cgdata/dump.test delete mode 100644 llvm/test/tools/llvm-cgdata/empty.test delete mode 100644 llvm/test/tools/llvm-cgdata/error.test delete mode 100644 llvm/test/tools/llvm-cgdata/merge-archive.test delete mode 100644 llvm/test/tools/llvm-cgdata/merge-concat.test delete mode 100644 llvm/test/tools/llvm-cgdata/merge-double.test delete mode 100644 llvm/test/tools/llvm-cgdata/merge-single.test delete mode 100644 llvm/test/tools/llvm-cgdata/show.test delete mode 100644 llvm/tools/llvm-cgdata/CMakeLists.txt delete mode 100644 llvm/tools/llvm-cgdata/llvm-cgdata.cpp diff --git a/llvm/include/llvm/CodeGenData/CodeGenData.h b/llvm/include/llvm/CodeGenData/CodeGenData.h deleted file mode 100644 index 659008c78abd93..00 --- a/llvm/include/llvm/CodeGenData/CodeGenData.h +++ /dev/null @@ -1,204 +0,0 @@ -//===- CodeGenData.h *- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===--===// -// -// This file contains support for codegen data that has stable summary which -// can be used to optimize the code in the subsequent codegen. -// -//===--===// - -#ifndef LLVM_CODEGENDATA_CODEGENDATA_H -#define LLVM_CODEGENDATA_CODEGENDATA_H - -#include "llvm/ADT/BitmaskEnum.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/CodeGenData/OutlinedHashTree.h" -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" -#include "llvm/IR/Module.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/TargetParser/Triple.h" -#include - -namespace llvm { - -enum CGDataSectKind { -#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind, -#include "llvm/CodeGenData/CodeGenData.inc" -}; - -std::string getCodeGenDataSectionName(CGDataSectKind CGSK, - Triple::ObjectFormatType OF, - bool AddSegmentInfo = true); - -enum class CGDataKind { - Unknown = 0x0, - // A function ou
[llvm-branch-commits] [llvm] 9e90c40 - Revert "[CGData] llvm-cgdata (#89884)"
Author: Gulfem Savrun Yeniceri Date: 2024-08-19T09:10:52+02:00 New Revision: 9e90c40564e21dc5f1a12e08cfdf29305aaf9f50 URL: https://github.com/llvm/llvm-project/commit/9e90c40564e21dc5f1a12e08cfdf29305aaf9f50 DIFF: https://github.com/llvm/llvm-project/commit/9e90c40564e21dc5f1a12e08cfdf29305aaf9f50.diff LOG: Revert "[CGData] llvm-cgdata (#89884)" This reverts commit d3fb41dddc11b0ebc338a3b9e6a5ab7288ff7d1d and forward fix patches because of the issue explained in: https://github.com/llvm/llvm-project/pull/89884#issuecomment-2244348117. Revert "Fix tests for https://github.com/llvm/llvm-project/pull/89884 (#100061)" This reverts commit 67937a3f969aaf97a745a45281a0d22273bff713. Revert "Fix build break for https://github.com/llvm/llvm-project/pull/89884 (#100050)" This reverts commit c33878c5787c128234d533ad19d672dc3eea19a8. Revert "[CGData] Fix -Wpessimizing-move in CodeGenDataReader.cpp (NFC)" This reverts commit 1f8b2b146141f3563085a1acb77deb50857a636d. (cherry picked from commit 73d78973fe072438f0f73088f889c66845b2b51a) Added: Modified: llvm/lib/CodeGenData/CMakeLists.txt llvm/test/CMakeLists.txt llvm/test/lit.cfg.py Removed: llvm/include/llvm/CodeGenData/CodeGenData.h llvm/include/llvm/CodeGenData/CodeGenData.inc llvm/include/llvm/CodeGenData/CodeGenDataReader.h llvm/include/llvm/CodeGenData/CodeGenDataWriter.h llvm/lib/CodeGenData/CodeGenData.cpp llvm/lib/CodeGenData/CodeGenDataReader.cpp llvm/lib/CodeGenData/CodeGenDataWriter.cpp llvm/test/tools/llvm-cgdata/dump.test llvm/test/tools/llvm-cgdata/empty.test llvm/test/tools/llvm-cgdata/error.test llvm/test/tools/llvm-cgdata/merge-archive.test llvm/test/tools/llvm-cgdata/merge-concat.test llvm/test/tools/llvm-cgdata/merge-double.test llvm/test/tools/llvm-cgdata/merge-single.test llvm/test/tools/llvm-cgdata/show.test llvm/tools/llvm-cgdata/CMakeLists.txt llvm/tools/llvm-cgdata/llvm-cgdata.cpp diff --git a/llvm/include/llvm/CodeGenData/CodeGenData.h b/llvm/include/llvm/CodeGenData/CodeGenData.h deleted file mode 100644 index 659008c78abd93..00 --- a/llvm/include/llvm/CodeGenData/CodeGenData.h +++ /dev/null @@ -1,204 +0,0 @@ -//===- CodeGenData.h *- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===--===// -// -// This file contains support for codegen data that has stable summary which -// can be used to optimize the code in the subsequent codegen. -// -//===--===// - -#ifndef LLVM_CODEGENDATA_CODEGENDATA_H -#define LLVM_CODEGENDATA_CODEGENDATA_H - -#include "llvm/ADT/BitmaskEnum.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/CodeGenData/OutlinedHashTree.h" -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" -#include "llvm/IR/Module.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/TargetParser/Triple.h" -#include - -namespace llvm { - -enum CGDataSectKind { -#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind, -#include "llvm/CodeGenData/CodeGenData.inc" -}; - -std::string getCodeGenDataSectionName(CGDataSectKind CGSK, - Triple::ObjectFormatType OF, - bool AddSegmentInfo = true); - -enum class CGDataKind { - Unknown = 0x0, - // A function outlining info. - FunctionOutlinedHashTree = 0x1, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree) -}; - -const std::error_category &cgdata_category(); - -enum class cgdata_error { - success = 0, - eof, - bad_magic, - bad_header, - empty_cgdata, - malformed, - unsupported_version, -}; - -inline std::error_code make_error_code(cgdata_error E) { - return std::error_code(static_cast(E), cgdata_category()); -} - -class CGDataError : public ErrorInfo { -public: - CGDataError(cgdata_error Err, const Twine &ErrStr = Twine()) - : Err(Err), Msg(ErrStr.str()) { -assert(Err != cgdata_error::success && "Not an error"); - } - - std::string message() const override; - - void log(raw_ostream &OS) const override { OS << message(); } - - std::error_code convertToErrorCode() const override { -return make_error_code(Err); - } - - cgdata_error get() const { return Err; } - const std::string &getMessage() const { return Msg; } - - /// Consume an Error and return the raw enum value contained within it, and - /// the optional error message. The Error must either be a success value, or - /// contain a single CGDataError. - static std::pair take(Error E) { -auto Err = cgdat
[llvm-branch-commits] [clang] [AArch64] Add GCS release notes (PR #103866)
github-actions[bot] wrote: @john-brawn-arm (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/103866 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: Revert "[CGData] llvm-cgdata (#89884)" (PR #103886)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/103886 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: Revert "[CGData] llvm-cgdata (#89884)" (PR #103886)
github-actions[bot] wrote: @amy-kwan (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/103886 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)
tru wrote: This PR is pretty messy: Several commits instead of a single that can be cherry-picked, merge commit that makes it harder to cherry-pick and squash. Please update the PR to contain a single commit that fixes the issue and doesn't contain any merge commits, then I can merge it easily. Thanks. https://github.com/llvm/llvm-project/pull/102438 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [openmp] [OpenMP][AArch64] Fix branch protection in microtasks (#102317) (PR #103491)
https://github.com/DanielKristofKiss approved this pull request. Some Distros(e.g. Fedora) are build by default branch-protection=standard. Without this patch every application and library that links libopenmp.a will not be protected with BTI ( CFI protection for JOP attacks) Also libopenmp.so won't be protected as not all file is BTI compatible. So in security view this is an issue. Not a regression as this was the case in previous releases. Not a bug, as code will work just won't be CFI protected. There are systems out there with BTI as of today with such a distro. @tuliom do you have usecase for this? @tru Change is simple and straightforward. I support the backport but leave it up to you. HTH https://github.com/llvm/llvm-project/pull/103491 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/104602 >From 02cafa895c917a4b1726e64a5870877c95826be4 Mon Sep 17 00:00:00 2001 From: Spencer Abson Date: Fri, 16 Aug 2024 14:39:43 + Subject: [PATCH] [AArch64] Adopt updated B16B16 target flags The enablement of SVE/SME non-widening BFloat16 instructions was recently changed in response to an architecture update, in which: - FEAT_SVE_B16B16 was weakened - FEAT_SME_B16B16 was introduced New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the existing 'b16b16'. This was acheived in the below two patches. - https://github.com/llvm/llvm-project/pull/101480 - https://github.com/llvm/llvm-project/pull/102501 Ideally, the interface change introduced here will be valid in LLVM-19. We do not see it necessary to back-port the entire change, but just to add 'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged) 'b16b16' and 'sme2' flags which together cover all of these features. The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also fixed in this change. --- clang/include/clang/Basic/arm_sve.td | 26 +++ .../print-supported-extensions-aarch64.c | 2 ++ llvm/lib/Target/AArch64/AArch64Features.td| 9 +++ .../TargetParser/TargetParserTest.cpp | 15 ++- 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 94c093d8911562..fb11d743fd6479 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2116,7 +2116,7 @@ def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "", "b", MergeNone, "aarch64_sve_ multiclass MinMaxIntr { def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>; def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>; - def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; + def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { @@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { } multiclass SInstMinMaxByVector { - def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>; - def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>; + def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>; + def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>; - def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>; - def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>; + def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>; + def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { @@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x4", [IsStreaming], []>; } +multiclass BfSingleMultiVector { + def NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x2", [IsStreaming], []>; + def NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x4", [IsStreaming], []>; + + def NAME # _X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, "aarch64_sve_f" # name # "_x2", [IsStreaming], []>; + def NAME # _X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, "aarch64_sve_f" # name # "_x4", [IsStreaming], []>; +} + let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,b16b16"in { def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x2", [IsStreaming], []>; def SVBFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x4
[llvm-branch-commits] [clang] 02cafa8 - [AArch64] Adopt updated B16B16 target flags
Author: Spencer Abson Date: 2024-08-19T09:15:08+02:00 New Revision: 02cafa895c917a4b1726e64a5870877c95826be4 URL: https://github.com/llvm/llvm-project/commit/02cafa895c917a4b1726e64a5870877c95826be4 DIFF: https://github.com/llvm/llvm-project/commit/02cafa895c917a4b1726e64a5870877c95826be4.diff LOG: [AArch64] Adopt updated B16B16 target flags The enablement of SVE/SME non-widening BFloat16 instructions was recently changed in response to an architecture update, in which: - FEAT_SVE_B16B16 was weakened - FEAT_SME_B16B16 was introduced New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the existing 'b16b16'. This was acheived in the below two patches. - https://github.com/llvm/llvm-project/pull/101480 - https://github.com/llvm/llvm-project/pull/102501 Ideally, the interface change introduced here will be valid in LLVM-19. We do not see it necessary to back-port the entire change, but just to add 'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged) 'b16b16' and 'sme2' flags which together cover all of these features. The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also fixed in this change. Added: Modified: clang/include/clang/Basic/arm_sve.td clang/test/Driver/print-supported-extensions-aarch64.c llvm/lib/Target/AArch64/AArch64Features.td llvm/unittests/TargetParser/TargetParserTest.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 94c093d8911562..fb11d743fd6479 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2116,7 +2116,7 @@ def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "", "b", MergeNone, "aarch64_sve_ multiclass MinMaxIntr { def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>; def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>; - def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; + def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { @@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { } multiclass SInstMinMaxByVector { - def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>; - def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>; + def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>; + def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>; - def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>; - def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>; + def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>; + def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { @@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x4", [IsStreaming], []>; } +multiclass BfSingleMultiVector { + def NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x2", [IsStreaming], []>; + def NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x4", [IsStreaming], []>; + + def NAME # _X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, "aarch64_sve_f" # name # "_x2", [IsStreaming], []>; + def NAME # _X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, "aarch64_sve_f" # name # "_x4", [IsStreaming], []>; +} + let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,b16b16"in { def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x2", [IsStreaming], []>; def SVBFCLAMP_X4 : SI
[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/104602 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix rejects-valid in std::span copy construction (#104500) (PR #104603)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/104603 >From 90f2d48965ca8a27f4b814ada987d169ca6a6f44 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 16 Aug 2024 11:08:34 -0400 Subject: [PATCH] [libc++] Fix rejects-valid in std::span copy construction (#104500) Trying to copy-construct a std::span from another std::span holding an incomplete type would fail as we evaluate the SFINAE for the range-based constructor. The problem was that we checked for __is_std_span after checking for the range being a contiguous_range, which hard-errored because of arithmetic on a pointer to incomplete type. As a drive-by, refactor the whole test and format it. Fixes #104496 (cherry picked from commit 99696b35bc8a0054e0b0c1a26e8dd5049fa8c41b) --- libcxx/include/span | 2 +- .../views/views.span/span.cons/copy.pass.cpp | 126 -- 2 files changed, 86 insertions(+), 42 deletions(-) diff --git a/libcxx/include/span b/libcxx/include/span index 60d76d830f0f31..da631cdc3f90e6 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -206,10 +206,10 @@ struct __is_std_span> : true_type {}; template concept __span_compatible_range = +!__is_std_span>::value &&// ranges::contiguous_range<_Range> && // ranges::sized_range<_Range> && // (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) && // -!__is_std_span>::value &&// !__is_std_array>::value && // !is_array_v> && // is_convertible_v> (*)[], _ElementType (*)[]>; diff --git a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp index 28f13e122ddc5e..d3990fd60a459a 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===--===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // @@ -14,58 +15,101 @@ #include #include #include +#include #include "test_macros.h" -template -constexpr bool doCopy(const T &rhs) -{ -ASSERT_NOEXCEPT(T{rhs}); -T lhs{rhs}; -return lhs.data() == rhs.data() - &&lhs.size() == rhs.size(); -} +template +constexpr void test() { + ASSERT_NOEXCEPT(std::span(std::declval const&>())); + ASSERT_NOEXCEPT(std::span{std::declval const&>()}); -struct A{}; - -template -void testCV () -{ -int arr[] = {1,2,3}; -assert((doCopy(std::span () ))); -assert((doCopy(std::span() ))); -assert((doCopy(std::span (&arr[0], 1; -assert((doCopy(std::span(&arr[0], 1; -assert((doCopy(std::span (&arr[0], 2; -assert((doCopy(std::span(&arr[0], 2; + // dynamic_extent + { +std::span x; +std::span copy(x); +assert(copy.data() == x.data()); +assert(copy.size() == x.size()); + } + { +T array[3] = {}; +std::span x(array, 3); +std::span copy(x); +assert(copy.data() == array); +assert(copy.size() == 3); + } + { +T array[3] = {}; +std::span x(array, 2); +std::span copy(x); +assert(copy.data() == array); +assert(copy.size() == 2); + } + + // static extent + { +std::span x; +std::span copy(x); +assert(copy.data() == x.data()); +assert(copy.size() == x.size()); + } + { +T array[3] = {}; +std::span x(array); +std::span copy(x); +assert(copy.data() == array); +assert(copy.size() == 3); + } + { +T array[2] = {}; +std::span x(array); +std::span copy(x); +assert(copy.data() == array); +assert(copy.size() == 2); + } } +struct Foo {}; + +constexpr bool test_all() { + test(); + test(); + test(); + test(); -int main(int, char**) -{ -constexpr int carr[] = {1,2,3}; + test(); + test(); + test(); + test(); -static_assert(doCopy(std::span< int> ()),""); -static_assert(doCopy(std::span< int,0>()),""); -static_assert(doCopy(std::span (&carr[0], 1)), ""); -static_assert(doCopy(std::span(&carr[0], 1)), ""); -static_assert(doCopy(std::span (&carr[0], 2)), ""); -static_assert(doCopy(std::span(&carr[0], 2)), ""); + test(); + test(); + test(); + test(); -static_assert(doCopy(std::span()), ""); -static_assert(doCopy(std::span()), ""); -static_assert(doCopy(std::span()), ""); + // Note: Can't test non-fundamental types with volatile because we require `T*` to be indirectly_readable, + // which isn't the case when T is volatile. + test(); + test(); -std::string s; -assert(doCopy(std::span () )); -assert(doCopy(std::span() )); -assert(doCopy(std::span (&s, 1))
[llvm-branch-commits] [libcxx] 90f2d48 - [libc++] Fix rejects-valid in std::span copy construction (#104500)
Author: Louis Dionne Date: 2024-08-19T09:17:27+02:00 New Revision: 90f2d48965ca8a27f4b814ada987d169ca6a6f44 URL: https://github.com/llvm/llvm-project/commit/90f2d48965ca8a27f4b814ada987d169ca6a6f44 DIFF: https://github.com/llvm/llvm-project/commit/90f2d48965ca8a27f4b814ada987d169ca6a6f44.diff LOG: [libc++] Fix rejects-valid in std::span copy construction (#104500) Trying to copy-construct a std::span from another std::span holding an incomplete type would fail as we evaluate the SFINAE for the range-based constructor. The problem was that we checked for __is_std_span after checking for the range being a contiguous_range, which hard-errored because of arithmetic on a pointer to incomplete type. As a drive-by, refactor the whole test and format it. Fixes #104496 (cherry picked from commit 99696b35bc8a0054e0b0c1a26e8dd5049fa8c41b) Added: Modified: libcxx/include/span libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp Removed: diff --git a/libcxx/include/span b/libcxx/include/span index 60d76d830f0f31..da631cdc3f90e6 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -206,10 +206,10 @@ struct __is_std_span> : true_type {}; template concept __span_compatible_range = +!__is_std_span>::value &&// ranges::contiguous_range<_Range> && // ranges::sized_range<_Range> && // (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) && // -!__is_std_span>::value &&// !__is_std_array>::value && // !is_array_v> && // is_convertible_v> (*)[], _ElementType (*)[]>; diff --git a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp index 28f13e122ddc5e..d3990fd60a459a 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===--===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // @@ -14,58 +15,101 @@ #include #include #include +#include #include "test_macros.h" -template -constexpr bool doCopy(const T &rhs) -{ -ASSERT_NOEXCEPT(T{rhs}); -T lhs{rhs}; -return lhs.data() == rhs.data() - &&lhs.size() == rhs.size(); -} +template +constexpr void test() { + ASSERT_NOEXCEPT(std::span(std::declval const&>())); + ASSERT_NOEXCEPT(std::span{std::declval const&>()}); -struct A{}; - -template -void testCV () -{ -int arr[] = {1,2,3}; -assert((doCopy(std::span () ))); -assert((doCopy(std::span() ))); -assert((doCopy(std::span (&arr[0], 1; -assert((doCopy(std::span(&arr[0], 1; -assert((doCopy(std::span (&arr[0], 2; -assert((doCopy(std::span(&arr[0], 2; + // dynamic_extent + { +std::span x; +std::span copy(x); +assert(copy.data() == x.data()); +assert(copy.size() == x.size()); + } + { +T array[3] = {}; +std::span x(array, 3); +std::span copy(x); +assert(copy.data() == array); +assert(copy.size() == 3); + } + { +T array[3] = {}; +std::span x(array, 2); +std::span copy(x); +assert(copy.data() == array); +assert(copy.size() == 2); + } + + // static extent + { +std::span x; +std::span copy(x); +assert(copy.data() == x.data()); +assert(copy.size() == x.size()); + } + { +T array[3] = {}; +std::span x(array); +std::span copy(x); +assert(copy.data() == array); +assert(copy.size() == 3); + } + { +T array[2] = {}; +std::span x(array); +std::span copy(x); +assert(copy.data() == array); +assert(copy.size() == 2); + } } +struct Foo {}; + +constexpr bool test_all() { + test(); + test(); + test(); + test(); -int main(int, char**) -{ -constexpr int carr[] = {1,2,3}; + test(); + test(); + test(); + test(); -static_assert(doCopy(std::span< int> ()),""); -static_assert(doCopy(std::span< int,0>()),""); -static_assert(doCopy(std::span (&carr[0], 1)), ""); -static_assert(doCopy(std::span(&carr[0], 1)), ""); -static_assert(doCopy(std::span (&carr[0], 2)), ""); -static_assert(doCopy(std::span(&carr[0], 2)), ""); + test(); + test(); + test(); + test(); -static_assert(doCopy(std::span()), ""); -static_assert(doCopy(std::span()), ""); -static_assert(doCopy(std::span()), ""); + // Note: Can't test non-fundamental types with volatile because we require `T*` to be indirectly_readable, + // which isn't the case when T is volatile. + test(); + test(); -std::string s; -as
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix rejects-valid in std::span copy construction (#104500) (PR #104603)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/104603 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)
github-actions[bot] wrote: @SpencerAbson (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/104602 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix rejects-valid in std::span copy construction (#104500) (PR #104603)
github-actions[bot] wrote: @ldionne (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/104603 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] release/19.x: [clang-tidy] Fix crash in C language in readability-non-const-parameter (#100461) (PR #101878)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/101878 >From b45f75295e3038ef79dce4ac63fbf95b659eebe5 Mon Sep 17 00:00:00 2001 From: Piotr Zegar Date: Thu, 25 Jul 2024 17:26:01 +0200 Subject: [PATCH] [clang-tidy] Fix crash in C language in readability-non-const-parameter (#100461) Fix crash that happen when redeclaration got different number of parameters than definition. Fixes #100340 (cherry picked from commit a27f816fe56af9cc7f4f296ad6c577f6ea64349f) --- .../clang-tidy/readability/NonConstParameterCheck.cpp | 5 - clang-tools-extra/docs/ReleaseNotes.rst | 4 .../checkers/readability/non-const-parameter.c| 11 +++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c diff --git a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp index 95a3a5165e2e82..43b69a24bdb16d 100644 --- a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp @@ -157,9 +157,12 @@ void NonConstParameterCheck::diagnoseNonConstParameters() { if (!Function) continue; unsigned Index = Par->getFunctionScopeIndex(); -for (FunctionDecl *FnDecl : Function->redecls()) +for (FunctionDecl *FnDecl : Function->redecls()) { + if (FnDecl->getNumParams() <= Index) +continue; Fixes.push_back(FixItHint::CreateInsertion( FnDecl->getParamDecl(Index)->getBeginLoc(), "const ")); +} diag(Par->getLocation(), "pointer parameter '%0' can be pointer to const") << Par->getName() << Fixes; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 083b098d05d4ae..71461968629868 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -496,6 +496,10 @@ Changes in existing checks ``static_cast``. Fixed false positives in C++20 spaceship operator by ignoring casts in implicit and defaulted functions. +- Improved :doc:`readability-non-const-parameter + ` check to not crash when + redeclaration have fewer parameters than expected. + - Improved :doc:`readability-redundant-inline-specifier ` check to properly emit warnings for static data member with an in-class initializer. diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c b/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c new file mode 100644 index 00..db50467f3dd94e --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c @@ -0,0 +1,11 @@ +// RUN: %check_clang_tidy %s readability-non-const-parameter %t + +static int f(); + +int f(p) + int *p; +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: pointer parameter 'p' can be pointer to const [readability-non-const-parameter] +// CHECK-FIXES: {{^}} const int *p;{{$}} +{ +return *p; +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] b45f752 - [clang-tidy] Fix crash in C language in readability-non-const-parameter (#100461)
Author: Piotr Zegar Date: 2024-08-19T09:18:56+02:00 New Revision: b45f75295e3038ef79dce4ac63fbf95b659eebe5 URL: https://github.com/llvm/llvm-project/commit/b45f75295e3038ef79dce4ac63fbf95b659eebe5 DIFF: https://github.com/llvm/llvm-project/commit/b45f75295e3038ef79dce4ac63fbf95b659eebe5.diff LOG: [clang-tidy] Fix crash in C language in readability-non-const-parameter (#100461) Fix crash that happen when redeclaration got different number of parameters than definition. Fixes #100340 (cherry picked from commit a27f816fe56af9cc7f4f296ad6c577f6ea64349f) Added: clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c Modified: clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp clang-tools-extra/docs/ReleaseNotes.rst Removed: diff --git a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp index 95a3a5165e2e82..43b69a24bdb16d 100644 --- a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp @@ -157,9 +157,12 @@ void NonConstParameterCheck::diagnoseNonConstParameters() { if (!Function) continue; unsigned Index = Par->getFunctionScopeIndex(); -for (FunctionDecl *FnDecl : Function->redecls()) +for (FunctionDecl *FnDecl : Function->redecls()) { + if (FnDecl->getNumParams() <= Index) +continue; Fixes.push_back(FixItHint::CreateInsertion( FnDecl->getParamDecl(Index)->getBeginLoc(), "const ")); +} diag(Par->getLocation(), "pointer parameter '%0' can be pointer to const") << Par->getName() << Fixes; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 083b098d05d4ae..71461968629868 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -496,6 +496,10 @@ Changes in existing checks ``static_cast``. Fixed false positives in C++20 spaceship operator by ignoring casts in implicit and defaulted functions. +- Improved :doc:`readability-non-const-parameter + ` check to not crash when + redeclaration have fewer parameters than expected. + - Improved :doc:`readability-redundant-inline-specifier ` check to properly emit warnings for static data member with an in-class initializer. diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c b/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c new file mode 100644 index 00..db50467f3dd94e --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c @@ -0,0 +1,11 @@ +// RUN: %check_clang_tidy %s readability-non-const-parameter %t + +static int f(); + +int f(p) + int *p; +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: pointer parameter 'p' can be pointer to const [readability-non-const-parameter] +// CHECK-FIXES: {{^}} const int *p;{{$}} +{ +return *p; +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] release/19.x: [clang-tidy] Fix crash in C language in readability-non-const-parameter (#100461) (PR #101878)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/101878 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] release/19.x: [clang-tidy] Fix crash in C language in readability-non-const-parameter (#100461) (PR #101878)
github-actions[bot] wrote: @nikic (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/101878 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [openmp] [OpenMP][AArch64] Fix branch protection in microtasks (#102317) (PR #103491)
tru wrote: Ok - I am always open to accept things that improve security unless the risk is huge. But it sounds like this will only affect applications that are linking to openmp? Do you see any other risks of accepting this now? https://github.com/llvm/llvm-project/pull/103491 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: Fix codegen of consteval functions returning an empty class, and related issues (#93115) (PR #102070)
tru wrote: Ping on this one. Can someone review this change? https://github.com/llvm/llvm-project/pull/102070 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)
tru wrote: Thanks for the discussion. I am going to allow this since it's pretty contained and have a big upside for some certain types of users. I think this really skirts the line, for the future I hope things like this can hit the main branch before the branching. https://github.com/llvm/llvm-project/pull/102168 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/102168 >From 8fbe69a407b2784c7e9d91a3c69daa9786b14391 Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Tue, 6 Aug 2024 11:39:01 +0100 Subject: [PATCH] [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) Emit an optimization remark when objects in the stack frame may cause hazards in a streaming mode function. The analysis requires either the `aarch64-stack-hazard-size` or `aarch64-stack-hazard-remark-size` flag to be set by the user, with the former flag taking precedence. (cherry picked from commit a98a0dcf63f54c54c5601a34c9f8c10cde0162d6) --- .../llvm/CodeGen/TargetFrameLowering.h| 6 + llvm/lib/CodeGen/PrologEpilogInserter.cpp | 3 + .../Target/AArch64/AArch64FrameLowering.cpp | 204 +- .../lib/Target/AArch64/AArch64FrameLowering.h | 6 +- .../AArch64/ssve-stack-hazard-remarks.ll | 152 + .../CodeGen/AArch64/sve-stack-frame-layout.ll | 4 +- 6 files changed, 364 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index 0656c0d739fdfa..d8c9d0a432ad8f 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -15,6 +15,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/Support/TypeSize.h" #include @@ -473,6 +474,11 @@ class TargetFrameLowering { /// Return the frame base information to be encoded in the DWARF subprogram /// debug info. virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const; + + /// This method is called at the end of prolog/epilog code insertion, so + /// targets can emit remarks based on the final frame layout. + virtual void emitRemarks(const MachineFunction &MF, + MachineOptimizationRemarkEmitter *ORE) const {}; }; } // End llvm namespace diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index cd5d877e53d827..f4490873cfdcdb 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -341,6 +341,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { << ore::NV("Function", MF.getFunction().getName()) << "'"; }); + // Emit any remarks implemented for the target, based on final frame layout. + TFI->emitRemarks(MF, ORE); + delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bd530903bb664a..ba46ededc63a83 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -240,6 +240,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -275,6 +276,10 @@ cl::opt EnableHomogeneousPrologEpilog( // Stack hazard padding size. 0 = disabled. static cl::opt StackHazardSize("aarch64-stack-hazard-size", cl::init(0), cl::Hidden); +// Stack hazard size for analysis remarks. StackHazardSize takes precedence. +static cl::opt +StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), + cl::Hidden); // Whether to insert padding into non-streaming functions (for testing). static cl::opt StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", @@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, const auto &MFI = MF.getFrameInfo(); int64_t ObjectOffset = MFI.getObjectOffset(FI); + StackOffset SVEStackSize = getSVEStackSize(MF); + + // For VLA-area objects, just emit an offset at the end of the stack frame. + // Whilst not quite correct, these objects do live at the end of the frame and + // so it is more useful for analysis for the offset to reflect this. + if (MFI.isVariableSizedObjectIndex(FI)) { +return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize; + } // This is correct in the absence of any SVE stack objects. - StackOffset SVEStackSize = getSVEStackSize(MF); if (!SVEStackSize) return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea()); @@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return true; } -// Return the FrameID for a Load/Store instruction by looking at the MMO. -static std::optional getLdStFrameID(const MachineInstr &MI, - const MachineF
[llvm-branch-commits] [llvm] 8fbe69a - [AArch64] Add streaming-mode stack hazard optimization remarks (#101695)
Author: Hari Limaye Date: 2024-08-19T09:27:19+02:00 New Revision: 8fbe69a407b2784c7e9d91a3c69daa9786b14391 URL: https://github.com/llvm/llvm-project/commit/8fbe69a407b2784c7e9d91a3c69daa9786b14391 DIFF: https://github.com/llvm/llvm-project/commit/8fbe69a407b2784c7e9d91a3c69daa9786b14391.diff LOG: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) Emit an optimization remark when objects in the stack frame may cause hazards in a streaming mode function. The analysis requires either the `aarch64-stack-hazard-size` or `aarch64-stack-hazard-remark-size` flag to be set by the user, with the former flag taking precedence. (cherry picked from commit a98a0dcf63f54c54c5601a34c9f8c10cde0162d6) Added: llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll Modified: llvm/include/llvm/CodeGen/TargetFrameLowering.h llvm/lib/CodeGen/PrologEpilogInserter.cpp llvm/lib/Target/AArch64/AArch64FrameLowering.cpp llvm/lib/Target/AArch64/AArch64FrameLowering.h llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll Removed: diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index 0656c0d739fdfa..d8c9d0a432ad8f 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -15,6 +15,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/Support/TypeSize.h" #include @@ -473,6 +474,11 @@ class TargetFrameLowering { /// Return the frame base information to be encoded in the DWARF subprogram /// debug info. virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const; + + /// This method is called at the end of prolog/epilog code insertion, so + /// targets can emit remarks based on the final frame layout. + virtual void emitRemarks(const MachineFunction &MF, + MachineOptimizationRemarkEmitter *ORE) const {}; }; } // End llvm namespace diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index cd5d877e53d827..f4490873cfdcdb 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -341,6 +341,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { << ore::NV("Function", MF.getFunction().getName()) << "'"; }); + // Emit any remarks implemented for the target, based on final frame layout. + TFI->emitRemarks(MF, ORE); + delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bd530903bb664a..ba46ededc63a83 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -240,6 +240,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -275,6 +276,10 @@ cl::opt EnableHomogeneousPrologEpilog( // Stack hazard padding size. 0 = disabled. static cl::opt StackHazardSize("aarch64-stack-hazard-size", cl::init(0), cl::Hidden); +// Stack hazard size for analysis remarks. StackHazardSize takes precedence. +static cl::opt +StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), + cl::Hidden); // Whether to insert padding into non-streaming functions (for testing). static cl::opt StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", @@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, const auto &MFI = MF.getFrameInfo(); int64_t ObjectOffset = MFI.getObjectOffset(FI); + StackOffset SVEStackSize = getSVEStackSize(MF); + + // For VLA-area objects, just emit an offset at the end of the stack frame. + // Whilst not quite correct, these objects do live at the end of the frame and + // so it is more useful for analysis for the offset to reflect this. + if (MFI.isVariableSizedObjectIndex(FI)) { +return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize; + } // This is correct in the absence of any SVE stack objects. - StackOffset SVEStackSize = getSVEStackSize(MF); if (!SVEStackSize) return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea()); @@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return true; } -// Return the FrameID for a Load/Store instruction by looking at the MMO. -static std::optional getLdStFrameID(const MachineInstr &MI, -
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/102168 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: Reland [C++20] [Modules] [Itanium ABI] Generate the vtable in the mod… (#102287) (PR #102561)
tru wrote: Any update on this one? https://github.com/llvm/llvm-project/pull/102561 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)
github-actions[bot] wrote: @hazzlim (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/102168 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor (#102605) (PR #102924)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/102924 >From 6e3026883d77124e32a2a7be72c3361fba3e7457 Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Mon, 12 Aug 2024 09:08:46 +0200 Subject: [PATCH] [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor (#102605) In C++23 anything can be constexpr, including a dtor of a class whose members and bases don't have constexpr dtors. Avoid early triggering of vtable instantiation int this case. Fixes https://github.com/llvm/llvm-project/issues/102293 (cherry picked from commit d469794d0cdfd2fea50a6ce0c0e33abb242d744c) --- clang/lib/Sema/SemaDeclCXX.cpp | 29 - clang/test/SemaCXX/gh102293.cpp | 22 ++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 clang/test/SemaCXX/gh102293.cpp diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 66ca62f5d7c4cd..ecf8754143a49e 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -7042,11 +7042,38 @@ void Sema::CheckCompletedCXXClass(Scope *S, CXXRecordDecl *Record) { } } +bool EffectivelyConstexprDestructor = true; +// Avoid triggering vtable instantiation due to a dtor that is not +// "effectively constexpr" for better compatibility. +// See https://github.com/llvm/llvm-project/issues/102293 for more info. +if (isa(M)) { + auto Check = [](QualType T, auto &&Check) -> bool { +const CXXRecordDecl *RD = +T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); +if (!RD || !RD->isCompleteDefinition()) + return true; + +if (!RD->hasConstexprDestructor()) + return false; + +for (const CXXBaseSpecifier &B : RD->bases()) + if (!Check(B.getType(), Check)) +return false; +for (const FieldDecl *FD : RD->fields()) + if (!Check(FD->getType(), Check)) +return false; +return true; + }; + EffectivelyConstexprDestructor = + Check(QualType(Record->getTypeForDecl(), 0), Check); +} + // Define defaulted constexpr virtual functions that override a base class // function right away. // FIXME: We can defer doing this until the vtable is marked as used. if (CSM != CXXSpecialMemberKind::Invalid && !M->isDeleted() && -M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods()) +M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods() && +EffectivelyConstexprDestructor) DefineDefaultedFunction(*this, M, M->getLocation()); if (!Incomplete) diff --git a/clang/test/SemaCXX/gh102293.cpp b/clang/test/SemaCXX/gh102293.cpp new file mode 100644 index 00..30629fc03bf6a9 --- /dev/null +++ b/clang/test/SemaCXX/gh102293.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify %s +// expected-no-diagnostics + +template static void destroy() { +T t; +++t; +} + +struct Incomplete; + +template struct HasD { + ~HasD() { destroy(); } +}; + +struct HasVT { + virtual ~HasVT(); +}; + +struct S : HasVT { + HasD<> v; +}; + ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 6e30268 - [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor (#102605)
Author: Mariya Podchishchaeva Date: 2024-08-19T09:28:25+02:00 New Revision: 6e3026883d77124e32a2a7be72c3361fba3e7457 URL: https://github.com/llvm/llvm-project/commit/6e3026883d77124e32a2a7be72c3361fba3e7457 DIFF: https://github.com/llvm/llvm-project/commit/6e3026883d77124e32a2a7be72c3361fba3e7457.diff LOG: [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor (#102605) In C++23 anything can be constexpr, including a dtor of a class whose members and bases don't have constexpr dtors. Avoid early triggering of vtable instantiation int this case. Fixes https://github.com/llvm/llvm-project/issues/102293 (cherry picked from commit d469794d0cdfd2fea50a6ce0c0e33abb242d744c) Added: clang/test/SemaCXX/gh102293.cpp Modified: clang/lib/Sema/SemaDeclCXX.cpp Removed: diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 66ca62f5d7c4cd..ecf8754143a49e 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -7042,11 +7042,38 @@ void Sema::CheckCompletedCXXClass(Scope *S, CXXRecordDecl *Record) { } } +bool EffectivelyConstexprDestructor = true; +// Avoid triggering vtable instantiation due to a dtor that is not +// "effectively constexpr" for better compatibility. +// See https://github.com/llvm/llvm-project/issues/102293 for more info. +if (isa(M)) { + auto Check = [](QualType T, auto &&Check) -> bool { +const CXXRecordDecl *RD = +T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); +if (!RD || !RD->isCompleteDefinition()) + return true; + +if (!RD->hasConstexprDestructor()) + return false; + +for (const CXXBaseSpecifier &B : RD->bases()) + if (!Check(B.getType(), Check)) +return false; +for (const FieldDecl *FD : RD->fields()) + if (!Check(FD->getType(), Check)) +return false; +return true; + }; + EffectivelyConstexprDestructor = + Check(QualType(Record->getTypeForDecl(), 0), Check); +} + // Define defaulted constexpr virtual functions that override a base class // function right away. // FIXME: We can defer doing this until the vtable is marked as used. if (CSM != CXXSpecialMemberKind::Invalid && !M->isDeleted() && -M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods()) +M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods() && +EffectivelyConstexprDestructor) DefineDefaultedFunction(*this, M, M->getLocation()); if (!Incomplete) diff --git a/clang/test/SemaCXX/gh102293.cpp b/clang/test/SemaCXX/gh102293.cpp new file mode 100644 index 00..30629fc03bf6a9 --- /dev/null +++ b/clang/test/SemaCXX/gh102293.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify %s +// expected-no-diagnostics + +template static void destroy() { +T t; +++t; +} + +struct Incomplete; + +template struct HasD { + ~HasD() { destroy(); } +}; + +struct HasVT { + virtual ~HasVT(); +}; + +struct S : HasVT { + HasD<> v; +}; + ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor (#102605) (PR #102924)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/102924 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor (#102605) (PR #102924)
github-actions[bot] wrote: @llvmbot (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/102924 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [Mips] Fix fast isel for i16 bswap. (#103398) (PR #104745)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/104745 Backport ebe7265b142f370f0a563fece5db22f57383ba2d Requested by: @nikic >From 9263d00e6bcbd1408b4c8c5b98b61332460911b5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 16 Aug 2024 14:54:51 -0700 Subject: [PATCH] [Mips] Fix fast isel for i16 bswap. (#103398) We need to mask the SRL result to 8 bits before ORing in the SLL. This is needed in case bits 23:16 of the input aren't zero. They will have been shifted into bits 15:8. We don't need to AND the result with 0x. It's ok if the upper 16 bits of the register are garbage. Fixes #103035. (cherry picked from commit ebe7265b142f370f0a563fece5db22f57383ba2d) --- llvm/lib/Target/Mips/MipsFastISel.cpp | 4 ++-- llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp index bd8ef43da625c3..64a0e9321598ff 100644 --- a/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -1608,8 +1608,8 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { } emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8); emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8); -emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]); -emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0x); +emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[1]).addImm(0xFF); +emitInst(Mips::OR, DestReg).addReg(TempReg[0]).addReg(TempReg[2]); updateValueMap(II, DestReg); return true; } diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll b/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll index bd762a0e1d741f..ce664c78e86c2a 100644 --- a/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll +++ b/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll @@ -21,8 +21,8 @@ define void @b16() { ; 32R1: sll $[[TMP1:[0-9]+]], $[[A_VAL]], 8 ; 32R1: srl $[[TMP2:[0-9]+]], $[[A_VAL]], 8 - ; 32R1: or$[[TMP3:[0-9]+]], $[[TMP1]], $[[TMP2]] - ; 32R1: andi $[[TMP4:[0-9]+]], $[[TMP3]], 65535 + ; 32R1: andi $[[TMP3:[0-9]+]], $[[TMP2]], 255 + ; 32R1: or$[[RESULT:[0-9]+]], $[[TMP1]], $[[TMP3]] ; 32R2: wsbh $[[RESULT:[0-9]+]], $[[A_VAL]] ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [Mips] Fix fast isel for i16 bswap. (#103398) (PR #104745)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/104745 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [Mips] Fix fast isel for i16 bswap. (#103398) (PR #104745)
llvmbot wrote: @dtcxzyw What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/104745 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 38c581c8defc81105160a69bb46a9e489b56f10e Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..14e42c6f358e46 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..cd07cb741eb4bb 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() &&
[llvm-branch-commits] [llvm] release/19.x: [SLP]Fix PR104422: Wrong value truncation (PR #104747)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/104747 Backport 65ac12d3c9877ecf5b97552364e7eead887d94eb 56140a8258a3498cfcd9f0f05c182457d43cbfd2 Requested by: @nikic >From 92cec47736ec4ba1ef33649660a4fc21bdd8e57e Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 15 Aug 2024 07:21:10 -0700 Subject: [PATCH 1/2] [SLP][NFC]Add a test with incorrect minbitwidth analysis for reduced operands (cherry picked from commit 65ac12d3c9877ecf5b97552364e7eead887d94eb) --- .../X86/operand-is-reduced-val.ll | 46 +++ 1 file changed, 46 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll new file mode 100644 index 00..5fb93e27539d8e --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s -slp-threshold=-10 | FileCheck %s + +define i64 @src(i32 %a) { +; CHECK-LABEL: define i64 @src( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT:[[TMP17:%.*]] = sext i32 [[A]] to i64 +; CHECK-NEXT:[[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 +; CHECK-NEXT:[[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT:[[TMP3:%.*]] = add <4 x i32> [[TMP2]], +; CHECK-NEXT:[[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64> +; CHECK-NEXT:[[TMP5:%.*]] = and <4 x i32> [[TMP3]], +; CHECK-NEXT:[[TMP6:%.*]] = zext <4 x i32> [[TMP5]] to <4 x i64> +; CHECK-NEXT:[[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) +; CHECK-NEXT:[[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) +; CHECK-NEXT:[[TMP19:%.*]] = add i64 [[TMP18]], [[TMP16]] +; CHECK-NEXT:[[OP_RDX1:%.*]] = add i64 [[TMP19]], 4294967297 +; CHECK-NEXT:[[TMP21:%.*]] = add i64 [[OP_RDX1]], [[TMP17]] +; CHECK-NEXT:ret i64 [[TMP21]] +; +entry: + %0 = sext i32 %a to i64 + %1 = add nsw i64 %0, 4294967297 + %2 = sext i32 %a to i64 + %3 = add nsw i64 %2, 4294967297 + %4 = add i64 %3, %1 + %5 = and i64 %3, 1 + %6 = add i64 %4, %5 + %7 = sext i32 %a to i64 + %8 = add nsw i64 %7, 4294967297 + %9 = add i64 %8, %6 + %10 = and i64 %8, 1 + %11 = add i64 %9, %10 + %12 = sext i32 %a to i64 + %13 = add nsw i64 %12, 4294967297 + %14 = add i64 %13, %11 + %15 = and i64 %13, 1 + %16 = add i64 %14, %15 + %17 = sext i32 %a to i64 + %18 = add nsw i64 %17, 4294967297 + %19 = add i64 %18, %16 + %20 = and i64 %18, 1 + %21 = add i64 %19, %20 + ret i64 %21 +} >From 60b6cb6403168fbb62f1dd79083ac768d747edb9 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 15 Aug 2024 07:57:37 -0700 Subject: [PATCH 2/2] [SLP]Fix PR104422: Wrong value truncation The minbitwidth restrictions can be skipped only for immediate reduced values, for other nodes still need to check if external users allow bitwidth reduction. Fixes https://github.com/llvm/llvm-project/issues/104422 (cherry picked from commit 56140a8258a3498cfcd9f0f05c182457d43cbfd2) --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++- .../SLPVectorizer/X86/operand-is-reduced-val.ll | 17 ++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cca9eeebaa53f0..0cddc510d36dac 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote( if (any_of(E.Scalars, [&](Value *V) { return !all_of(V->users(), [=](User *U) { return getTreeEntry(U) || - (UserIgnoreList && UserIgnoreList->contains(U)) || + (E.Idx == 0 && UserIgnoreList && + UserIgnoreList->contains(U)) || (!isa(U) && U->getType()->isSized() && !U->getType()->isScalableTy() && DL->getTypeSizeInBits(U->getType()) <= BitWidth); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll index 5fb93e27539d8e..5fcac3fbf3bafe 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll @@ -8,15 +8,18 @@ define i64 @src(i32 %a) { ; CHECK-NEXT:[[TMP17:%.*]] = sext i32 [[A]] to i64 ; CHECK-NEXT:[[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 ; CHECK-NEXT:[[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; C
[llvm-branch-commits] [llvm] release/19.x: [SLP]Fix PR104422: Wrong value truncation (PR #104747)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/104747 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [SLP]Fix PR104422: Wrong value truncation (PR #104747)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) Changes Backport 65ac12d3c9877ecf5b97552364e7eead887d94eb 56140a8258a3498cfcd9f0f05c182457d43cbfd2 Requested by: @nikic --- Full diff: https://github.com/llvm/llvm-project/pull/104747.diff 2 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+2-1) - (added) llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll (+49) ``diff diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cca9eeebaa53f0..0cddc510d36dac 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote( if (any_of(E.Scalars, [&](Value *V) { return !all_of(V->users(), [=](User *U) { return getTreeEntry(U) || - (UserIgnoreList && UserIgnoreList->contains(U)) || + (E.Idx == 0 && UserIgnoreList && + UserIgnoreList->contains(U)) || (!isa(U) && U->getType()->isSized() && !U->getType()->isScalableTy() && DL->getTypeSizeInBits(U->getType()) <= BitWidth); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll new file mode 100644 index 00..5fcac3fbf3bafe --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s -slp-threshold=-10 | FileCheck %s + +define i64 @src(i32 %a) { +; CHECK-LABEL: define i64 @src( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT:[[TMP17:%.*]] = sext i32 [[A]] to i64 +; CHECK-NEXT:[[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 +; CHECK-NEXT:[[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT:[[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> +; CHECK-NEXT:[[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], +; CHECK-NEXT:[[TMP6:%.*]] = and <4 x i64> [[TMP4]], +; CHECK-NEXT:[[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) +; CHECK-NEXT:[[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) +; CHECK-NEXT:[[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP16]], i32 0 +; CHECK-NEXT:[[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP18]], i32 1 +; CHECK-NEXT:[[TMP10:%.*]] = insertelement <2 x i64> , i64 [[TMP17]], i32 0 +; CHECK-NEXT:[[TMP11:%.*]] = add <2 x i64> [[TMP9]], [[TMP10]] +; CHECK-NEXT:[[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; CHECK-NEXT:[[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT:[[TMP21:%.*]] = add i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT:ret i64 [[TMP21]] +; +entry: + %0 = sext i32 %a to i64 + %1 = add nsw i64 %0, 4294967297 + %2 = sext i32 %a to i64 + %3 = add nsw i64 %2, 4294967297 + %4 = add i64 %3, %1 + %5 = and i64 %3, 1 + %6 = add i64 %4, %5 + %7 = sext i32 %a to i64 + %8 = add nsw i64 %7, 4294967297 + %9 = add i64 %8, %6 + %10 = and i64 %8, 1 + %11 = add i64 %9, %10 + %12 = sext i32 %a to i64 + %13 = add nsw i64 %12, 4294967297 + %14 = add i64 %13, %11 + %15 = and i64 %13, 1 + %16 = add i64 %14, %15 + %17 = sext i32 %a to i64 + %18 = add nsw i64 %17, 4294967297 + %19 = add i64 %18, %16 + %20 = and i64 %18, 1 + %21 = add i64 %19, %20 + ret i64 %21 +} `` https://github.com/llvm/llvm-project/pull/104747 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Add some brief LLVM 19 release notes for Pointer Authentication ABI support (PR #104657)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/104657 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [llvm][CodeGen] Address the issue discovered In window scheduling (#101665) (PR #102881)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/102881 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [BOLT] Fix relocations handling (PR #102741)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/102741 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [AArch64][ARM] Add a release note about _BitInt (PR #101521)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/101521 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov created https://github.com/llvm/llvm-project/pull/104748 WIP I will be adding unit tests and I am considering if we should have integrations tests for the entire omp.workshare pipeline. >From 793ae50dd00c4347bea78ca6ecd33783c69de354 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 17:33:52 +0900 Subject: [PATCH 1/5] Add workshare loop wrapper lowerings --- .../lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp | 6 -- .../HLFIR/Transforms/OptimizedBufferization.cpp| 10 +++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index b608677c526310..1848dbe2c7a2c2 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -26,12 +26,13 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" -#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "llvm/ADT/TypeSwitch.h" namespace hlfir { @@ -792,7 +793,8 @@ struct ElementalOpConversion // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. hlfir::LoopNest loopNest = -hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); +hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); auto insPt = builder.saveInsertionPoint(); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index c4aed6b79df923..150e3e91197241 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -20,6 +20,7 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Transforms/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Dominance.h" @@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite( // Generate a loop nest looping around the hlfir.elemental shape and clone // hlfir.elemental region inside the inner loop hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); + hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, loopNest.oneBasedIndices); @@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite( llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto arrayElement = hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices); @@ -648,7 +651,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite( llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); builder.setInsertionPointToStart(loopNest.body); auto rhsArrayElement = hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices); >From d7ba8a1598f517a5a3c8401d22b81b50114112f1 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Mon, 19 Aug 2024 15:01:31 +0900 Subject: [PATCH 2/5] Bufferize test --- flang/test/HLFIR/bufferize-workshare.fir | 58 1 file changed, 58 insertions(+) create mode 100644 flang/test/HLFIR/bufferize-workshare.fir diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir new file mode 100644 index 00..86a2f031478dd7 --- /dev
[llvm-branch-commits] [llvm] release/19.x: [Mips] Fix fast isel for i16 bswap. (#103398) (PR #104745)
https://github.com/dtcxzyw approved this pull request. https://github.com/llvm/llvm-project/pull/104745 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/104748 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)
https://github.com/ivanradanov ready_for_review https://github.com/llvm/llvm-project/pull/101443 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov ready_for_review https://github.com/llvm/llvm-project/pull/101444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)
https://github.com/ivanradanov ready_for_review https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Ivan R. Ivanov (ivanradanov) Changes 1/4 in stack for workshare implementation 1/4 https://github.com/llvm/llvm-project/pull/101443 2/4 https://github.com/llvm/llvm-project/pull/101444 3/4 https://github.com/llvm/llvm-project/pull/101445 4/4 https://github.com/llvm/llvm-project/pull/101446 --- Full diff: https://github.com/llvm/llvm-project/pull/101443.diff 5 Files Affected: - (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h (+2) - (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td (+43) - (modified) mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp (+23) - (modified) mlir/test/Dialect/OpenMP/invalid.mlir (+42) - (modified) mlir/test/Dialect/OpenMP/ops.mlir (+69) ``diff diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 38e4d8f245e4fa..896ca9581c3fc8 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -316,6 +316,8 @@ using TeamsOperands = detail::Clauses; +using WorkshareOperands = detail::Clauses; + using WsloopOperands = detail::Clauses { + let summary = "workshare directive"; + let description = [{ +The workshare construct divides the execution of the enclosed structured +block into separate units of work, and causes the threads of the team to +share the work such that each unit is executed only once by one thread, in +the context of its implicit task + +This operation is used for the intermediate representation of the workshare +block before the work gets divided between the threads. See the flang +LowerWorkshare pass for details. + }] # clausesDescription; + + let builders = [ +OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)> + ]; +} + +def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [ +DeclareOpInterfaceMethods, +RecursiveMemoryEffects, SingleBlock + ], singleRegion = true> { + let summary = "contains loop nests to be parallelized by workshare"; + let description = [{ +This operation wraps a loop nest that is marked for dividing into units of +work by an encompassing omp.workshare operation. + }]; + + let builders = [ +OpBuilder<(ins), [{ build($_builder, $_state, {}); }]> + ]; + let assemblyFormat = "$region attr-dict"; + let hasVerifier = 1; +} + //===--===// // Loop Nest //===--===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 11780f84697b15..90f9a19ebe32b5 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1683,6 +1683,29 @@ LogicalResult SingleOp::verify() { getCopyprivateSyms()); } +//===--===// +// WorkshareOp +//===--===// + +void WorkshareOp::build(OpBuilder &builder, OperationState &state, +const WorkshareOperands &clauses) { + WorkshareOp::build(builder, state, clauses.nowait); +} + +//===--===// +// WorkshareLoopWrapperOp +//===--===// + +LogicalResult WorkshareLoopWrapperOp::verify() { + if (!isWrapper()) +return emitOpError() << "must be a loop wrapper"; + if (getNestedWrapper()) +return emitError() << "nested wrappers not supported"; + if (!(*this)->getParentOfType()) +return emitError() << "must be nested in an omp.workshare"; + return success(); +} + //===--===// // WsloopOp //===--===// diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 1d1d93f0977588..ee7c448c467cf5 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -2383,3 +2383,45 @@ func.func @masked_arg_count_mismatch(%arg0: i32, %arg1: i32) { }) : (i32, i32) -> () return } + +// - +func.func @nested_wrapper(%idx : index) { + omp.workshare { +// expected-error @below {{nested wrappers not supported}} +omp.workshare_loop_wrapper { + omp.simd { +omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) { + omp.yield +} +omp.terminator + } + omp.terminator +} +omp.terminator + } + return +} + +// - +func.func @not_wrapper() { + omp.workshare { +// expected-error @below {{must be a loop wrapper}} +omp.workshare_loop_w
[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)
llvmbot wrote: @llvm/pr-subscribers-mlir-openmp Author: Ivan R. Ivanov (ivanradanov) Changes 1/4 in stack for workshare implementation 1/4 https://github.com/llvm/llvm-project/pull/101443 2/4 https://github.com/llvm/llvm-project/pull/101444 3/4 https://github.com/llvm/llvm-project/pull/101445 4/4 https://github.com/llvm/llvm-project/pull/101446 --- Full diff: https://github.com/llvm/llvm-project/pull/101443.diff 5 Files Affected: - (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h (+2) - (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td (+43) - (modified) mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp (+23) - (modified) mlir/test/Dialect/OpenMP/invalid.mlir (+42) - (modified) mlir/test/Dialect/OpenMP/ops.mlir (+69) ``diff diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 38e4d8f245e4fa..896ca9581c3fc8 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -316,6 +316,8 @@ using TeamsOperands = detail::Clauses; +using WorkshareOperands = detail::Clauses; + using WsloopOperands = detail::Clauses { + let summary = "workshare directive"; + let description = [{ +The workshare construct divides the execution of the enclosed structured +block into separate units of work, and causes the threads of the team to +share the work such that each unit is executed only once by one thread, in +the context of its implicit task + +This operation is used for the intermediate representation of the workshare +block before the work gets divided between the threads. See the flang +LowerWorkshare pass for details. + }] # clausesDescription; + + let builders = [ +OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)> + ]; +} + +def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [ +DeclareOpInterfaceMethods, +RecursiveMemoryEffects, SingleBlock + ], singleRegion = true> { + let summary = "contains loop nests to be parallelized by workshare"; + let description = [{ +This operation wraps a loop nest that is marked for dividing into units of +work by an encompassing omp.workshare operation. + }]; + + let builders = [ +OpBuilder<(ins), [{ build($_builder, $_state, {}); }]> + ]; + let assemblyFormat = "$region attr-dict"; + let hasVerifier = 1; +} + //===--===// // Loop Nest //===--===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 11780f84697b15..90f9a19ebe32b5 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1683,6 +1683,29 @@ LogicalResult SingleOp::verify() { getCopyprivateSyms()); } +//===--===// +// WorkshareOp +//===--===// + +void WorkshareOp::build(OpBuilder &builder, OperationState &state, +const WorkshareOperands &clauses) { + WorkshareOp::build(builder, state, clauses.nowait); +} + +//===--===// +// WorkshareLoopWrapperOp +//===--===// + +LogicalResult WorkshareLoopWrapperOp::verify() { + if (!isWrapper()) +return emitOpError() << "must be a loop wrapper"; + if (getNestedWrapper()) +return emitError() << "nested wrappers not supported"; + if (!(*this)->getParentOfType()) +return emitError() << "must be nested in an omp.workshare"; + return success(); +} + //===--===// // WsloopOp //===--===// diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 1d1d93f0977588..ee7c448c467cf5 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -2383,3 +2383,45 @@ func.func @masked_arg_count_mismatch(%arg0: i32, %arg1: i32) { }) : (i32, i32) -> () return } + +// - +func.func @nested_wrapper(%idx : index) { + omp.workshare { +// expected-error @below {{nested wrappers not supported}} +omp.workshare_loop_wrapper { + omp.simd { +omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) { + omp.yield +} +omp.terminator + } + omp.terminator +} +omp.terminator + } + return +} + +// - +func.func @not_wrapper() { + omp.workshare { +// expected-error @below {{must be a loop wrapper}} +omp.workshare_loop_wr
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Ivan R. Ivanov (ivanradanov) Changes 2/4 1/4 https://github.com/llvm/llvm-project/pull/101443 2/4 https://github.com/llvm/llvm-project/pull/101444 3/4 https://github.com/llvm/llvm-project/pull/101445 4/4 https://github.com/llvm/llvm-project/pull/101446 --- Full diff: https://github.com/llvm/llvm-project/pull/101444.diff 2 Files Affected: - (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+26-4) - (modified) flang/test/Lower/OpenMP/workshare.f90 (+3-3) ``diff diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4f..f7bc565ea8cbc1 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare `` https://github.com/llvm/llvm-project/pull/101444 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
https://github.com/ivanradanov edited https://github.com/llvm/llvm-project/pull/101445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
llvmbot wrote: @llvm/pr-subscribers-flang-fir-hlfir Author: Ivan R. Ivanov (ivanradanov) Changes 3/4 1/4 https://github.com/llvm/llvm-project/pull/101443 2/4 https://github.com/llvm/llvm-project/pull/101444 3/4 https://github.com/llvm/llvm-project/pull/101445 4/4 https://github.com/llvm/llvm-project/pull/101446 This alternative loop nest generation is used in 4/4 for the workshare lowering. --- Full diff: https://github.com/llvm/llvm-project/pull/101445.diff 7 Files Affected: - (modified) flang/include/flang/Optimizer/Builder/HLFIRTools.h (+7-5) - (modified) flang/lib/Lower/ConvertCall.cpp (+1-1) - (modified) flang/lib/Lower/OpenMP/ReductionProcessor.cpp (+2-2) - (modified) flang/lib/Optimizer/Builder/HLFIRTools.cpp (+39-13) - (modified) flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp (+2-1) - (modified) flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp (+15-15) - (modified) flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp (+3-3) ``diff diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..f073f494b3fb21 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp = nullptr; + mlir::Block *body = nullptr; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWorkshareLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWorkshareLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWorkshareLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..31378841ed 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
https://github.com/ivanradanov ready_for_review https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -2,3 +2,4 @@ add_subdirectory(CodeGen) add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Transforms) +add_subdirectory(OpenMP) ivanradanov wrote: PR for this up here https://github.com/llvm/llvm-project/pull/104732 https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101445 >From 5e470922405b735d63b4aded76450cc52e94e003 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +-- .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea0780..14e42c6f358e46 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, -mlir::Value shape, bool isUnordered = false) { +mlir::Value shape, bool isUnordered = false, +bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844e..0689d6e033dd9c 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c27..72a90dd0d6f29d 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178c..cd07cb741eb4bb 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() &&
[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101444 >From 63d49e4dcd128b470ee77006c594673203dd2df2 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++ 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4f..f7bc565ea8cbc1 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, +semantics::SemanticsContext &semaCtx, +lower::StatementContext &stmtCtx, +const List &clauses, mlir::Location loc, +mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, +llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: -// FIXME: Workshare is not a commonly used OpenMP construct, an -// implementation for this feature will come later. For the codes -// that use this construct, add a single construct for now. -genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); +genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs >From 621b01775171a4718fa405f201b58c3dca005e5a Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Sun, 4 Aug 2024 16:02:37 +0900 Subject: [PATCH 2/2] Fix lower test for workshare --- flang/test/Lower/OpenMP/workshare.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/workshare.f90 b/flang/test/Lower/OpenMP/workshare.f90 index 1e11677a15e1f0..8e771952f5b6da 100644 --- a/flang/test/Lower/OpenMP/workshare.f90 +++ b/flang/test/Lower/OpenMP/workshare.f90 @@ -6,7 +6,7 @@ subroutine sb1(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp workshare arr = 0 !$omp end workshare @@ -20,7 +20,7 @@ subroutine sb2(arr) integer :: arr(:) !CHECK: omp.parallel { !$omp parallel -!CHECK: omp.single nowait { +!CHECK: omp.workshare nowait { !$omp workshare arr = 0 !$omp end workshare nowait @@ -33,7 +33,7 @@ subroutine sb2(arr) subroutine sb3(arr) integer :: arr(:) !CHECK: omp.parallel { -!CHECK: omp.single { +!CHECK: omp.workshare { !$omp parallel workshare arr = 0 !$omp end parallel workshare ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101443 >From 604b0293e0574e9d697d4071c2b853a5a27af1e1 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Wed, 31 Jul 2024 14:09:09 +0900 Subject: [PATCH 1/7] [MLIR][omp] Add omp.workshare op --- .../Dialect/OpenMP/OpenMPClauseOperands.h | 3 +++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 22 +++ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 13 +++ 3 files changed, 38 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 38e4d8f245e4fa..d14e5e17afbb08 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -17,6 +17,7 @@ #include "mlir/IR/BuiltinAttributes.h" #include "llvm/ADT/SmallVector.h" +#include #include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc" @@ -316,6 +317,8 @@ using TeamsOperands = detail::Clauses; +using WorkshareOperands = detail::Clauses; + using WsloopOperands = detail::Clauses { + let summary = "workshare directive"; + let description = [{ +The workshare construct divides the execution of the enclosed structured +block into separate units of work, and causes the threads of the team to +share the work such that each unit is executed only once by one thread, in +the context of its implicit task + }] # clausesDescription; + + let builders = [ +OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)> + ]; + + let hasVerifier = 1; +} + //===--===// // Loop Nest //===--===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 11780f84697b15..9a189eb2059e01 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1683,6 +1683,19 @@ LogicalResult SingleOp::verify() { getCopyprivateSyms()); } +//===--===// +// WorkshareOp +//===--===// + +void WorkshareOp::build(OpBuilder &builder, OperationState &state, +const WorkshareOperands &clauses) { + WorkshareOp::build(builder, state, clauses.nowait); +} + +LogicalResult WorkshareOp::verify() { + return (*this)->getRegion(0).getBlocks().size() == 1 ? success() : failure(); +} + //===--===// // WsloopOp //===--===// >From f2fd4f278c23ec99dae3ac44e1c05fcb629f707d Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 2 Aug 2024 16:10:25 +0900 Subject: [PATCH 2/7] Add custom omp loop wrapper --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 +++ 1 file changed, 11 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 5199ff50abb959..76f0c472cfdb14 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -308,6 +308,17 @@ def WorkshareOp : OpenMP_Op<"workshare", clauses = [ let hasVerifier = 1; } +def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [ +DeclareOpInterfaceMethods, +RecursiveMemoryEffects, SingleBlock + ], singleRegion = true> { + let summary = "contains loop nests to be parallelized by workshare"; + + let builders = [ +OpBuilder<(ins), [{ build($_builder, $_state, {}); }]> + ]; +} + //===--===// // Loop Nest //===--===// >From 22c66e6db3997e38254d9848661a38627cd7bb19 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Fri, 2 Aug 2024 16:08:58 +0900 Subject: [PATCH 3/7] Add recursive memory effects trait to workshare --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 76f0c472cfdb14..7d1c80333855e7 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -290,7 +290,9 @@ def SingleOp : OpenMP_Op<"single", traits = [ // 2.8.3 Workshare Construct //===--===// -def WorkshareOp : OpenMP_Op<"workshare", clauses = [ +def WorkshareOp : OpenMP_Op<"workshare", traits = [ +RecursiveMemoryEffects, + ], clauses = [ OpenMP_NowaitClause, ], singleRegion = true> { let summar
[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
@@ -344,6 +345,7 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); pm.addPass(hlfir::createConvertHLFIRtoFIR()); + pm.addPass(flangomp::createLowerWorkshare()); ivanradanov wrote: I opted to keep the rest of the openmp passes as they are and have added a bool argument to control whether to run the lower-workshare pass https://github.com/llvm/llvm-project/pull/101446 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [BOLT] Fix relocations handling (PR #102741)
tru wrote: Can we get a review on this so that it can be included before final is done if it should be? https://github.com/llvm/llvm-project/pull/102741 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [BOLT] Fix relocations handling (PR #102741)
yota9 wrote: I hope someone can approve this, as it is a major fix https://github.com/llvm/llvm-project/pull/102741 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (#104435) (PR #104752)
https://github.com/tmatheson-arm created https://github.com/llvm/llvm-project/pull/104752 This adds a check that all ExtensionWithMArch which are marked as implied features for an architecture are also present in the list of default features. It doesn't make sense to have something mandatory but not on by default. There were a number of existing cases that violated this rule, and some changes to which features are mandatory (indicated by the Implies field). This resulted in a bug where if a feature was marked as `Implies` but was not added to `DefaultExt`, then for `-march=base_arch+nofeat` the Driver would consider `feat` to have never been added and therefore would do nothing to disable it (no `-target-feature -feat` would be added, but the backend would enable the feature by default because of `Implies`). See clang/test/Driver/aarch64-negative-modifiers-for-default-features.c. Note that the processor definitions do not respect the architecture DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on the Architecture definition, the feature needs to be added to all processor definitions (that are based on that architecture) in order to preserve the existing behaviour. I have checked the TRMs for many cases (see specific commit messages) but in other cases I have just kept the current behaviour and not tried to fix it. >From b523150d05242d9e00dc2dcf1694a1cf7dde088f Mon Sep 17 00:00:00 2001 From: Tomas Matheson Date: Sat, 17 Aug 2024 13:36:40 +0100 Subject: [PATCH] [AArch64] Add a check for invalid default features (#104435) This adds a check that all ExtensionWithMArch which are marked as implied features for an architecture are also present in the list of default features. It doesn't make sense to have something mandatory but not on by default. There were a number of existing cases that violated this rule, and some changes to which features are mandatory (indicated by the Implies field). This resulted in a bug where if a feature was marked as `Implies` but was not added to `DefaultExt`, then for `-march=base_arch+nofeat` the Driver would consider `feat` to have never been added and therefore would do nothing to disable it (no `-target-feature -feat` would be added, but the backend would enable the feature by default because of `Implies`). See clang/test/Driver/aarch64-negative-modifiers-for-default-features.c. Note that the processor definitions do not respect the architecture DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on the Architecture definition, the feature needs to be added to all processor definitions (that are based on that architecture) in order to preserve the existing behaviour. I have checked the TRMs for many cases (see specific commit messages) but in other cases I have just kept the current behaviour and not tried to fix it. --- clang/test/CodeGen/aarch64-targetattr.c | 12 +-- ...-negative-modifiers-for-default-features.c | 12 +++ clang/test/Driver/arm-sb.c| 2 +- .../aarch64-apple-a12.c | 1 - .../aarch64-apple-a13.c | 1 - .../aarch64-apple-a14.c | 1 - .../aarch64-apple-a15.c | 1 - .../aarch64-apple-a16.c | 1 - .../aarch64-apple-a17.c | 1 - .../aarch64-apple-m4.c| 2 - .../aarch64-cortex-r82.c | 1 - .../aarch64-cortex-r82ae.c| 1 - llvm/lib/Target/AArch64/AArch64Features.td| 19 ++-- llvm/lib/Target/AArch64/AArch64Processors.td | 46 +++-- llvm/test/MC/AArch64/arm64-system-encoding.s | 2 +- llvm/test/MC/AArch64/armv8.5a-ssbs-error.s| 2 +- llvm/test/MC/AArch64/armv8.5a-ssbs.s | 2 +- .../MC/Disassembler/AArch64/armv8.5a-ssbs.txt | 2 +- .../AArch64/basic-a64-instructions.txt| 2 +- .../TargetParser/TargetParserTest.cpp | 97 +++ llvm/utils/TableGen/ARMTargetDefEmitter.cpp | 32 +- 21 files changed, 156 insertions(+), 84 deletions(-) create mode 100644 clang/test/Driver/aarch64-negative-modifiers-for-default-features.c diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c index 4f891f938b6186..d6227be2ebef83 100644 --- a/clang/test/CodeGen/aarch64-targetattr.c +++ b/clang/test/CodeGen/aarch64-targetattr.c @@ -195,19 +195,19 @@ void minusarch() {} // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[AT
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (#104435) (PR #104752)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Tomas Matheson (tmatheson-arm) Changes This adds a check that all ExtensionWithMArch which are marked as implied features for an architecture are also present in the list of default features. It doesn't make sense to have something mandatory but not on by default. There were a number of existing cases that violated this rule, and some changes to which features are mandatory (indicated by the Implies field). This resulted in a bug where if a feature was marked as `Implies` but was not added to `DefaultExt`, then for `-march=base_arch+nofeat` the Driver would consider `feat` to have never been added and therefore would do nothing to disable it (no `-target-feature -feat` would be added, but the backend would enable the feature by default because of `Implies`). See clang/test/Driver/aarch64-negative-modifiers-for-default-features.c. Note that the processor definitions do not respect the architecture DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on the Architecture definition, the feature needs to be added to all processor definitions (that are based on that architecture) in order to preserve the existing behaviour. I have checked the TRMs for many cases (see specific commit messages) but in other cases I have just kept the current behaviour and not tried to fix it. --- Patch is 66.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104752.diff 21 Files Affected: - (modified) clang/test/CodeGen/aarch64-targetattr.c (+6-6) - (added) clang/test/Driver/aarch64-negative-modifiers-for-default-features.c (+12) - (modified) clang/test/Driver/arm-sb.c (+1-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c (-2) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c (-1) - (modified) llvm/lib/Target/AArch64/AArch64Features.td (+9-10) - (modified) llvm/lib/Target/AArch64/AArch64Processors.td (+37-9) - (modified) llvm/test/MC/AArch64/arm64-system-encoding.s (+1-1) - (modified) llvm/test/MC/AArch64/armv8.5a-ssbs-error.s (+1-1) - (modified) llvm/test/MC/AArch64/armv8.5a-ssbs.s (+1-1) - (modified) llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt (+1-1) - (modified) llvm/test/MC/Disassembler/AArch64/basic-a64-instructions.txt (+1-1) - (modified) llvm/unittests/TargetParser/TargetParserTest.cpp (+57-40) - (modified) llvm/utils/TableGen/ARMTargetDefEmitter.cpp (+29-3) ``diff diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c index 4f891f938b6186..d6227be2ebef83 100644 --- a/clang/test/CodeGen/aarch64-targetattr.c +++ b/clang/test/CodeGen/aarch64-targetattr.c @@ -195,19 +195,19 @@ void minusarch() {} // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } -// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+complxnum,+crc,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" } +// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (#104435) (PR #104752)
llvmbot wrote: @llvm/pr-subscribers-mc Author: Tomas Matheson (tmatheson-arm) Changes This adds a check that all ExtensionWithMArch which are marked as implied features for an architecture are also present in the list of default features. It doesn't make sense to have something mandatory but not on by default. There were a number of existing cases that violated this rule, and some changes to which features are mandatory (indicated by the Implies field). This resulted in a bug where if a feature was marked as `Implies` but was not added to `DefaultExt`, then for `-march=base_arch+nofeat` the Driver would consider `feat` to have never been added and therefore would do nothing to disable it (no `-target-feature -feat` would be added, but the backend would enable the feature by default because of `Implies`). See clang/test/Driver/aarch64-negative-modifiers-for-default-features.c. Note that the processor definitions do not respect the architecture DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on the Architecture definition, the feature needs to be added to all processor definitions (that are based on that architecture) in order to preserve the existing behaviour. I have checked the TRMs for many cases (see specific commit messages) but in other cases I have just kept the current behaviour and not tried to fix it. --- Patch is 66.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104752.diff 21 Files Affected: - (modified) clang/test/CodeGen/aarch64-targetattr.c (+6-6) - (added) clang/test/Driver/aarch64-negative-modifiers-for-default-features.c (+12) - (modified) clang/test/Driver/arm-sb.c (+1-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c (-2) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c (-1) - (modified) clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c (-1) - (modified) llvm/lib/Target/AArch64/AArch64Features.td (+9-10) - (modified) llvm/lib/Target/AArch64/AArch64Processors.td (+37-9) - (modified) llvm/test/MC/AArch64/arm64-system-encoding.s (+1-1) - (modified) llvm/test/MC/AArch64/armv8.5a-ssbs-error.s (+1-1) - (modified) llvm/test/MC/AArch64/armv8.5a-ssbs.s (+1-1) - (modified) llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt (+1-1) - (modified) llvm/test/MC/Disassembler/AArch64/basic-a64-instructions.txt (+1-1) - (modified) llvm/unittests/TargetParser/TargetParserTest.cpp (+57-40) - (modified) llvm/utils/TableGen/ARMTargetDefEmitter.cpp (+29-3) ``diff diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c index 4f891f938b6186..d6227be2ebef83 100644 --- a/clang/test/CodeGen/aarch64-targetattr.c +++ b/clang/test/CodeGen/aarch64-targetattr.c @@ -195,19 +195,19 @@ void minusarch() {} // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } -// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+complxnum,+crc,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" } +// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (#104435) (PR #104752)
https://github.com/tmatheson-arm milestoned https://github.com/llvm/llvm-project/pull/104752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (#104435) (PR #104752)
https://github.com/tmatheson-arm edited https://github.com/llvm/llvm-project/pull/104752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)
https://github.com/tmatheson-arm edited https://github.com/llvm/llvm-project/pull/104752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)
https://github.com/tmatheson-arm edited https://github.com/llvm/llvm-project/pull/104752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)
https://github.com/jthackray approved this pull request. LGTM. This fix should definitely be in llvm19. https://github.com/llvm/llvm-project/pull/104752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)
DavidSpickett wrote: This needs a summary on the impact of not including the change, for folks who are not familiar with Arm's extension details. > This resulted in a bug where if a feature was marked as Implies but was not > added to DefaultExt, then for -march=base_arch+nofeat the Driver would > consider feat to have never been added and therefore would do nothing to > disable it (no -target-feature -feat would be added, but the backend would > enable the feature by default because of Implies). See clang/test/Driver/aarch64-negative-modifiers-for-default-features.c. So I think the impact is: This could result in a binary including instructions from extensions that the user has explicitly requested be disabled. This binary will fault at runtime on hardware that does not have these extensions. https://github.com/llvm/llvm-project/pull/104752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)
DavidSpickett wrote: > This adds a check that all ExtensionWithMArch which are marked as implied > features for an architecture are also present in the list of default features. And do I understand correctly that though this PR is titled "Add a check", it also fixes instances that the check discovered? So the backport is primarily to include those fixes rather than the check itself. https://github.com/llvm/llvm-project/pull/104752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)
tmatheson-arm wrote: Yes both of those are correct. https://github.com/llvm/llvm-project/pull/104752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)
https://github.com/tmatheson-arm edited https://github.com/llvm/llvm-project/pull/104752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Fix a bug where user could not disable certain architecture features (PR #104752)
https://github.com/tmatheson-arm edited https://github.com/llvm/llvm-project/pull/104752 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)
https://github.com/sharadhr updated https://github.com/llvm/llvm-project/pull/102438 >From 909706ce5474c40eeb6355233c891cd0fd335347 Mon Sep 17 00:00:00 2001 From: Sharadh Rajaraman Date: Mon, 19 Aug 2024 12:17:58 +0100 Subject: [PATCH] [clang][driver] `TY_ModuleFile` should be a 'CXX' file type --- clang/lib/Driver/Types.cpp | 4 +++- clang/test/Driver/cl-cxx20-modules.cppm | 8 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp index a7b6b9000e1d2b..2b9b391c19c9fd 100644 --- a/clang/lib/Driver/Types.cpp +++ b/clang/lib/Driver/Types.cpp @@ -242,7 +242,9 @@ bool types::isCXX(ID Id) { case TY_CXXHUHeader: case TY_PP_CXXHeaderUnit: case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader: - case TY_CXXModule: case TY_PP_CXXModule: + case TY_CXXModule: + case TY_PP_CXXModule: + case TY_ModuleFile: case TY_PP_CLCXX: case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE: case TY_HIP: diff --git a/clang/test/Driver/cl-cxx20-modules.cppm b/clang/test/Driver/cl-cxx20-modules.cppm index 06df929c42342f..43dbf517485a05 100644 --- a/clang/test/Driver/cl-cxx20-modules.cppm +++ b/clang/test/Driver/cl-cxx20-modules.cppm @@ -1,3 +1,6 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + // RUN: %clang_cl /std:c++20 --precompile -### -- %s 2>&1 | FileCheck --check-prefix=PRECOMPILE %s // PRECOMPILE: -emit-module-interface @@ -6,3 +9,8 @@ // RUN: %clang_cl /std:c++20 --fprebuilt-module-path=. -### -- %s 2>&1 | FileCheck --check-prefix=FPREBUILT %s // FPREBUILT: -fprebuilt-module-path=. + +// RUN: %clang_cl %t/test.pcm /std:c++20 -### 2>&1 | FileCheck --check-prefix=CPP20WARNING %t/test.pcm + +//--- test.pcm +// CPP20WARNING-NOT: clang-cl: warning: argument unused during compilation: '/std:c++20' [-Wunused-command-line-argument] ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)
sharadhr wrote: @tru, thanks for the feedback; I hope this is better. https://github.com/llvm/llvm-project/pull/102438 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)
tru wrote: Looks much better. I'll merge it later. https://github.com/llvm/llvm-project/pull/102438 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Fix a bug where user could not disable certain architecture features (PR #104752)
https://github.com/tmatheson-arm updated https://github.com/llvm/llvm-project/pull/104752 >From b523150d05242d9e00dc2dcf1694a1cf7dde088f Mon Sep 17 00:00:00 2001 From: Tomas Matheson Date: Sat, 17 Aug 2024 13:36:40 +0100 Subject: [PATCH 1/2] [AArch64] Add a check for invalid default features (#104435) This adds a check that all ExtensionWithMArch which are marked as implied features for an architecture are also present in the list of default features. It doesn't make sense to have something mandatory but not on by default. There were a number of existing cases that violated this rule, and some changes to which features are mandatory (indicated by the Implies field). This resulted in a bug where if a feature was marked as `Implies` but was not added to `DefaultExt`, then for `-march=base_arch+nofeat` the Driver would consider `feat` to have never been added and therefore would do nothing to disable it (no `-target-feature -feat` would be added, but the backend would enable the feature by default because of `Implies`). See clang/test/Driver/aarch64-negative-modifiers-for-default-features.c. Note that the processor definitions do not respect the architecture DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on the Architecture definition, the feature needs to be added to all processor definitions (that are based on that architecture) in order to preserve the existing behaviour. I have checked the TRMs for many cases (see specific commit messages) but in other cases I have just kept the current behaviour and not tried to fix it. --- clang/test/CodeGen/aarch64-targetattr.c | 12 +-- ...-negative-modifiers-for-default-features.c | 12 +++ clang/test/Driver/arm-sb.c| 2 +- .../aarch64-apple-a12.c | 1 - .../aarch64-apple-a13.c | 1 - .../aarch64-apple-a14.c | 1 - .../aarch64-apple-a15.c | 1 - .../aarch64-apple-a16.c | 1 - .../aarch64-apple-a17.c | 1 - .../aarch64-apple-m4.c| 2 - .../aarch64-cortex-r82.c | 1 - .../aarch64-cortex-r82ae.c| 1 - llvm/lib/Target/AArch64/AArch64Features.td| 19 ++-- llvm/lib/Target/AArch64/AArch64Processors.td | 46 +++-- llvm/test/MC/AArch64/arm64-system-encoding.s | 2 +- llvm/test/MC/AArch64/armv8.5a-ssbs-error.s| 2 +- llvm/test/MC/AArch64/armv8.5a-ssbs.s | 2 +- .../MC/Disassembler/AArch64/armv8.5a-ssbs.txt | 2 +- .../AArch64/basic-a64-instructions.txt| 2 +- .../TargetParser/TargetParserTest.cpp | 97 +++ llvm/utils/TableGen/ARMTargetDefEmitter.cpp | 32 +- 21 files changed, 156 insertions(+), 84 deletions(-) create mode 100644 clang/test/Driver/aarch64-negative-modifiers-for-default-features.c diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c index 4f891f938b6186..d6227be2ebef83 100644 --- a/clang/test/CodeGen/aarch64-targetattr.c +++ b/clang/test/CodeGen/aarch64-targetattr.c @@ -195,19 +195,19 @@ void minusarch() {} // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } -// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+complxnum,+crc,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" } +// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } +// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)
davemgreen wrote: Thanks @tru https://github.com/llvm/llvm-project/pull/102168 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AArch64] Fix a bug where user could not disable certain architecture features (PR #104752)
https://github.com/tmatheson-arm updated https://github.com/llvm/llvm-project/pull/104752 >From b523150d05242d9e00dc2dcf1694a1cf7dde088f Mon Sep 17 00:00:00 2001 From: Tomas Matheson Date: Sat, 17 Aug 2024 13:36:40 +0100 Subject: [PATCH 1/3] [AArch64] Add a check for invalid default features (#104435) This adds a check that all ExtensionWithMArch which are marked as implied features for an architecture are also present in the list of default features. It doesn't make sense to have something mandatory but not on by default. There were a number of existing cases that violated this rule, and some changes to which features are mandatory (indicated by the Implies field). This resulted in a bug where if a feature was marked as `Implies` but was not added to `DefaultExt`, then for `-march=base_arch+nofeat` the Driver would consider `feat` to have never been added and therefore would do nothing to disable it (no `-target-feature -feat` would be added, but the backend would enable the feature by default because of `Implies`). See clang/test/Driver/aarch64-negative-modifiers-for-default-features.c. Note that the processor definitions do not respect the architecture DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on the Architecture definition, the feature needs to be added to all processor definitions (that are based on that architecture) in order to preserve the existing behaviour. I have checked the TRMs for many cases (see specific commit messages) but in other cases I have just kept the current behaviour and not tried to fix it. --- clang/test/CodeGen/aarch64-targetattr.c | 12 +-- ...-negative-modifiers-for-default-features.c | 12 +++ clang/test/Driver/arm-sb.c| 2 +- .../aarch64-apple-a12.c | 1 - .../aarch64-apple-a13.c | 1 - .../aarch64-apple-a14.c | 1 - .../aarch64-apple-a15.c | 1 - .../aarch64-apple-a16.c | 1 - .../aarch64-apple-a17.c | 1 - .../aarch64-apple-m4.c| 2 - .../aarch64-cortex-r82.c | 1 - .../aarch64-cortex-r82ae.c| 1 - llvm/lib/Target/AArch64/AArch64Features.td| 19 ++-- llvm/lib/Target/AArch64/AArch64Processors.td | 46 +++-- llvm/test/MC/AArch64/arm64-system-encoding.s | 2 +- llvm/test/MC/AArch64/armv8.5a-ssbs-error.s| 2 +- llvm/test/MC/AArch64/armv8.5a-ssbs.s | 2 +- .../MC/Disassembler/AArch64/armv8.5a-ssbs.txt | 2 +- .../AArch64/basic-a64-instructions.txt| 2 +- .../TargetParser/TargetParserTest.cpp | 97 +++ llvm/utils/TableGen/ARMTargetDefEmitter.cpp | 32 +- 21 files changed, 156 insertions(+), 84 deletions(-) create mode 100644 clang/test/Driver/aarch64-negative-modifiers-for-default-features.c diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c index 4f891f938b6186..d6227be2ebef83 100644 --- a/clang/test/CodeGen/aarch64-targetattr.c +++ b/clang/test/CodeGen/aarch64-targetattr.c @@ -195,19 +195,19 @@ void minusarch() {} // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } -// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+complxnum,+crc,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" } +// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } +// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-
[llvm-branch-commits] [openmp] [OpenMP][AArch64] Fix branch protection in microtasks (#102317) (PR #103491)
tuliom wrote: > @tuliom do you have usecase for this? My usecase is indeed having Fedora and CentOS/RHEL to be fully protected against JOP attacks. The lack of BTI support causes [annocheck](https://sourceware.org/annobin/annobin.html/Test-dynamic-tags.html) to report this issue, e.g. https://issues.redhat.com/browse/RHEL-50807 https://github.com/llvm/llvm-project/pull/103491 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [openmp] [OpenMP][AArch64] Fix branch protection in microtasks (#102317) (PR #103491)
tuliom wrote: > But it sounds like this will only affect applications that are linking to > openmp? Correct. > Do you see any other risks of accepting this now? IMHO, no. But if you prefer to delay this to 19.1.1, that looks good to me. https://github.com/llvm/llvm-project/pull/103491 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][R600] Move R600CodeGenPassBuilder into R600TargetMachine(NFC). (PR #103721)
https://github.com/cdevadas updated https://github.com/llvm/llvm-project/pull/103721 >From f2095f23eaa5c3876bf7f8d5706881e404c5aa1b Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan Date: Wed, 14 Aug 2024 14:18:59 +0530 Subject: [PATCH 1/3] [AMDGPU][R600] Move R600TargetMachine into R600CodeGenPassBuilder(NFC). --- llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 - .../Target/AMDGPU/R600CodeGenPassBuilder.cpp | 149 - .../Target/AMDGPU/R600CodeGenPassBuilder.h| 38 - llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 3 +- llvm/lib/Target/AMDGPU/R600TargetMachine.cpp | 154 -- 5 files changed, 186 insertions(+), 159 deletions(-) delete mode 100644 llvm/lib/Target/AMDGPU/R600TargetMachine.cpp diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index f493076f5bb8a3..16186f1f1bbed0 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -137,7 +137,6 @@ add_llvm_target(AMDGPUCodeGen R600Packetizer.cpp R600RegisterInfo.cpp R600Subtarget.cpp - R600TargetMachine.cpp R600TargetTransformInfo.cpp SIAnnotateControlFlow.cpp SIFixSGPRCopies.cpp diff --git a/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp index a57b3aa0adb158..1b182e17add9c0 100644 --- a/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp @@ -5,12 +5,159 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===--===// +// +/// \file +/// This file contains both AMDGPU-R600 target machine and the CodeGen pass +/// builder. The target machine contains all of the hardware specific +/// information needed to emit code for R600 GPUs and the CodeGen pass builder +/// handles the same for new pass manager infrastructure. +// +//===--===// #include "R600CodeGenPassBuilder.h" -#include "R600TargetMachine.h" +#include "R600.h" +#include "R600MachineScheduler.h" +#include "R600TargetTransformInfo.h" +#include "llvm/Transforms/Scalar.h" +#include using namespace llvm; +static cl::opt +EnableR600StructurizeCFG("r600-ir-structurize", + cl::desc("Use StructurizeCFG IR pass"), + cl::init(true)); + +static cl::opt EnableR600IfConvert("r600-if-convert", + cl::desc("Use if conversion pass"), + cl::ReallyHidden, cl::init(true)); + +static cl::opt EnableAMDGPUFunctionCallsOpt( +"amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), +cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), +cl::Hidden); + +static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { + return new ScheduleDAGMILive(C, std::make_unique()); +} + +static MachineSchedRegistry R600SchedRegistry("r600", + "Run R600's custom scheduler", + createR600MachineScheduler); + +//===--===// +// R600 Target Machine (R600 -> Cayman) - Legacy Pass Manager interface. +//===--===// + +R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + std::optional RM, + std::optional CM, + CodeGenOptLevel OL, bool JIT) +: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { + setRequiresStructuredCFG(true); + + // Override the default since calls aren't supported for r600. + if (EnableFunctionCalls && + EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0) +EnableFunctionCalls = false; +} + +const TargetSubtargetInfo * +R600TargetMachine::getSubtargetImpl(const Function &F) const { + StringRef GPU = getGPUName(F); + StringRef FS = getFeatureString(F); + + SmallString<128> SubtargetKey(GPU); + SubtargetKey.append(FS); + + auto &I = SubtargetMap[SubtargetKey]; + if (!I) { +// This needs to be done before we create a new subtarget since any +// creation will depend on the TM and the code generation flags on the +// function that reside in TargetOptions. +resetTargetOptions(F); +I = std::make_unique(TargetTriple, GPU, FS, *this); + } + + return I.get(); +} + +TargetTransformInfo +R600TargetMachine::getTargetTransformInfo(const Function &F) const { + return TargetTransformInfo(R600TTIImpl(this, F)); +} + +namespace { +class R600PassConfig final : public AMDGPUPassConfig { +pu