https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108786
>From c2be824d5935b57a87a7ccf618ee4eecf55b4e3a Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Tue, 17 Sep 2024 21:47:45 -0400 Subject: [PATCH] [IR] Introduce `T<address space>` to `DataLayout` to represent flat address space if a target supports it --- clang/lib/Basic/Targets/AMDGPU.cpp | 2 +- clang/lib/Basic/Targets/NVPTX.cpp | 8 +- clang/test/CodeGen/target-data.c | 8 +- clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl | 2 +- lld/test/ELF/lto/amdgcn-oses.ll | 6 +- lld/test/ELF/lto/amdgcn.ll | 2 +- llvm/docs/LangRef.rst | 6 + llvm/docs/ReleaseNotes.rst | 251 ++++++++++++++++++ llvm/include/llvm/IR/DataLayout.h | 4 + llvm/lib/IR/AutoUpgrade.cpp | 10 + llvm/lib/IR/DataLayout.cpp | 11 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 +- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 6 +- .../Bitcode/DataLayoutUpgradeTest.cpp | 40 +-- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 2 +- 16 files changed, 322 insertions(+), 40 deletions(-) create mode 100644 llvm/docs/ReleaseNotes.rst diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 3b748d0249d57b..9e322cef0d9c38 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -36,7 +36,7 @@ static const char *const DataLayoutStringAMDGCN = "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" "32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" - "-ni:7:8:9"; + "-ni:7:8:9-U0"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 88a0dbde52d52b..b70d8d30732327 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -66,12 +66,12 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, HasFloat16 = true; if (TargetPointerWidth == 32) - resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); + resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0"); else if (Opts.NVPTXUseShortPointers) - resetDataLayout( - "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); + resetDataLayout("e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:" + "32-n16:32:64-U0"); else - resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64"); + resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64-U0"); // If possible, get a TargetInfo for our host triple, so we can match its // types. diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 8548aa00cfe877..129d6de4927c43 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -160,11 +160,11 @@ // RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=NVPTX -// NVPTX: target datalayout = "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64" +// NVPTX: target datalayout = "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64-U0" // RUN: %clang_cc1 -triple nvptx64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=NVPTX64 -// NVPTX64: target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +// NVPTX64: target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64-U0" // RUN: %clang_cc1 -triple r600-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=R600 @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-U0" // Test default -target-cpu // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SIDefault -// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-U0" // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=AARCH64 diff --git a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl index bb52f87615214b..32427b971aa525 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s -// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-U0" void foo(void) {} diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll index 7a74d0317f2b9e..5316ce6e3208f7 100644 --- a/lld/test/ELF/lto/amdgcn-oses.ll +++ b/lld/test/ELF/lto/amdgcn-oses.ll @@ -25,7 +25,7 @@ ;--- amdhsa.ll target triple = "amdgcn-amd-amdhsa" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-U0" !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdhsa_code_object_version", i32 500} @@ -36,7 +36,7 @@ define void @_start() { ;--- amdpal.ll target triple = "amdgcn-amd-amdpal" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-U0" define amdgpu_cs void @_start() { ret void @@ -44,7 +44,7 @@ define amdgpu_cs void @_start() { ;--- mesa3d.ll target triple = "amdgcn-amd-mesa3d" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-U0" define void @_start() { ret void diff --git a/lld/test/ELF/lto/amdgcn.ll b/lld/test/ELF/lto/amdgcn.ll index 4281e209fd9789..5ef32976251cb8 100644 --- a/lld/test/ELF/lto/amdgcn.ll +++ b/lld/test/ELF/lto/amdgcn.ll @@ -5,7 +5,7 @@ ; Make sure the amdgcn triple is handled target triple = "amdgcn-amd-amdhsa" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-U0" define void @_start() { ret void diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b08bd79147f7de..77b050bfb11a40 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3076,6 +3076,12 @@ as follows: address space 0, this property only affects the default value to be used when creating globals without additional contextual information (e.g. in LLVM passes). +``U<address space>`` + Specifies the undesirable address space for a target. This specification does + not define a new address space; it must correspond to a valid, existing address + space and be capable of lossless conversion to other address spaces. If this + specification is absent, it indicates that the target does not have such an + address space. .. _alloca_addrspace: diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst new file mode 100644 index 00000000000000..b3244437d9bae4 --- /dev/null +++ b/llvm/docs/ReleaseNotes.rst @@ -0,0 +1,251 @@ +============================ +LLVM |release| Release Notes +============================ + +.. contents:: + :local: + +.. only:: PreRelease + + .. warning:: + These are in-progress notes for the upcoming LLVM |version| release. + Release notes for previous releases can be found on + `the Download Page <https://releases.llvm.org/download.html>`_. + + +Introduction +============ + +This document contains the release notes for the LLVM Compiler Infrastructure, +release |release|. Here we describe the status of LLVM, including major improvements +from the previous release, improvements in various subprojects of LLVM, and +some of the current users of the code. All LLVM releases may be downloaded +from the `LLVM releases web site <https://llvm.org/releases/>`_. + +For more information about LLVM, including information about the latest +release, please check out the `main LLVM web site <https://llvm.org/>`_. If you +have questions or comments, the `Discourse forums +<https://discourse.llvm.org>`_ is a good place to ask +them. + +Note that if you are reading this file from a Git checkout or the main +LLVM web page, this document applies to the *next* release, not the current +one. To see the release notes for a specific release, please see the `releases +page <https://llvm.org/releases/>`_. + +Non-comprehensive list of changes in this release +================================================= +.. NOTE + For small 1-3 sentence descriptions, just add an entry at the end of + this list. If your description won't fit comfortably in one bullet + point (e.g. maybe you would like to give an example of the + functionality, or simply have a lot to talk about), see the `NOTE` below + for adding a new subsection. + +* ... + +Update on required toolchains to build LLVM +------------------------------------------- + +Changes to the LLVM IR +---------------------- + +* The ``x86_mmx`` IR type has been removed. It will be translated to + the standard vector type ``<1 x i64>`` in bitcode upgrade. +* Renamed ``llvm.experimental.stepvector`` intrinsic to ``llvm.stepvector``. + +* Added ``usub_cond`` and ``usub_sat`` operations to ``atomicrmw``. + +* Added ``U<address space>`` to data layout to represent undesirable address space + if a target has it. + +Changes to LLVM infrastructure +------------------------------ + +Changes to building LLVM +------------------------ + +Changes to TableGen +------------------- + +Changes to Interprocedural Optimizations +---------------------------------------- + +Changes to the AArch64 Backend +------------------------------ + +* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill + the required alignment space with a sequence of `0x0` bytes (the requested + fill value) rather than NOPs. + +Changes to the AMDGPU Backend +----------------------------- + +* Removed ``llvm.amdgcn.flat.atomic.fadd`` and + ``llvm.amdgcn.global.atomic.fadd`` intrinsics. Users should use the + :ref:`atomicrmw <i_atomicrmw>` instruction with `fadd` and + addrspace(0) or addrspace(1) instead. + +Changes to the ARM Backend +-------------------------- + +* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill + the required alignment space with a sequence of `0x0` bytes (the requested + fill value) rather than NOPs. + +Changes to the AVR Backend +-------------------------- + +Changes to the DirectX Backend +------------------------------ + +Changes to the Hexagon Backend +------------------------------ + +Changes to the LoongArch Backend +-------------------------------- + +Changes to the MIPS Backend +--------------------------- + +Changes to the PowerPC Backend +------------------------------ + +Changes to the RISC-V Backend +----------------------------- + +* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill + the required alignment space with a sequence of `0x0` bytes (the requested + fill value) rather than NOPs. +* Added Syntacore SCR4 and SCR5 CPUs: ``-mcpu=syntacore-scr4/5-rv32/64`` +* ``-mcpu=sifive-p470`` was added. +* Added Hazard3 CPU as taped out for RP2350: ``-mcpu=rp2350-hazard3`` (32-bit + only). +* Fixed length vector support using RVV instructions now requires VLEN>=64. This + means Zve32x and Zve32f will also require Zvl64b. The prior support was + largely untested. +* The ``Zvbc32e`` and ``Zvkgs`` extensions are now supported experimentally. +* Added ``Smctr`` and ``Ssctr`` extensions. +* ``-mcpu=syntacore-scr7`` was added. + +Changes to the WebAssembly Backend +---------------------------------- + +Changes to the Windows Target +----------------------------- + +Changes to the X86 Backend +-------------------------- + +* `.balign N, 0x90`, `.p2align N, 0x90`, and `.align N, 0x90` in code sections + now fill the required alignment space with repeating `0x90` bytes, rather than + using optimised NOP filling. Optimised NOP filling fills the space with NOP + instructions of various widths, not just those that use the `0x90` byte + encoding. To use optimised NOP filling in a code section, leave off the + "fillval" argument, i.e. `.balign N`, `.p2align N` or `.align N` respectively. + +* Due to the removal of the ``x86_mmx`` IR type, functions with + ``x86_mmx`` arguments or return values will use a different, + incompatible, calling convention ABI. Such functions are not + generally seen in the wild (Clang never generates them!), so this is + not expected to result in real-world compatibility problems. + +* Support ISA of ``AVX10.2-256`` and ``AVX10.2-512``. + +Changes to the OCaml bindings +----------------------------- + +Changes to the Python bindings +------------------------------ + +Changes to the C API +-------------------- + +* The following symbols are deleted due to the removal of the ``x86_mmx`` IR type: + + * ``LLVMX86_MMXTypeKind`` + * ``LLVMX86MMXTypeInContext`` + * ``LLVMX86MMXType`` + + * The following functions are added to further support non-null-terminated strings: + + * ``LLVMGetNamedFunctionWithLength`` + * ``LLVMGetNamedGlobalWithLength`` + +* The following functions are added to access the ``LLVMContextRef`` associated + with ``LLVMValueRef`` and ``LLVMBuilderRef`` objects: + + * ``LLVMGetValueContext`` + * ``LLVMGetBuilderContext`` + +* The new pass manager can now be invoked with a custom alias analysis pipeline, using + the ``LLVMPassBuilderOptionsSetAAPipeline`` function. + +* It is now also possible to run the new pass manager on a single function, by calling + ``LLVMRunPassesOnFunction`` instead of ``LLVMRunPasses``. + +* Support for creating instructions with custom synchronization scopes has been added: + + * ``LLVMGetSyncScopeID`` to map a synchronization scope name to an ID. + * ``LLVMBuildFenceSyncScope``, ``LLVMBuildAtomicRMWSyncScope`` and + ``LLVMBuildAtomicCmpXchgSyncScope`` versions of the existing builder functions + with an additional synchronization scope ID parameter. + * ``LLVMGetAtomicSyncScopeID`` and ``LLVMSetAtomicSyncScopeID`` to get and set the + synchronization scope of any atomic instruction. + * ``LLVMIsAtomic`` to check if an instruction is atomic, for use with the above functions. + Because of backwards compatibility, ``LLVMIsAtomicSingleThread`` and + ``LLVMSetAtomicSingleThread`` continue to work with any instruction type. + +* The `LLVMSetPersonalityFn` and `LLVMSetInitializer` APIs now support clearing the + personality function and initializer respectively by passing a null pointer. + +* The following functions are added to allow iterating over debug records attached to + instructions: + + * ``LLVMGetFirstDbgRecord`` + * ``LLVMGetLastDbgRecord`` + * ``LLVMGetNextDbgRecord`` + * ``LLVMGetPreviousDbgRecord`` + + +Changes to the CodeGen infrastructure +------------------------------------- + +Changes to the Metadata Info +--------------------------------- + +Changes to the Debug Info +--------------------------------- + +Changes to the LLVM tools +--------------------------------- + +Changes to LLDB +--------------------------------- + +Changes to BOLT +--------------------------------- + +Changes to Sanitizers +--------------------- + +Other Changes +------------- + +External Open Source Projects Using LLVM 19 +=========================================== + +* A project... + +Additional Information +====================== + +A wide variety of additional information is available on the `LLVM web page +<https://llvm.org/>`_, in particular in the `documentation +<https://llvm.org/docs/>`_ section. The web page also contains versions of the +API documentation which is up-to-date with the Git version of the source +code. You can access versions of these documents specific to this release by +going into the ``llvm/docs/`` directory in the LLVM tree. + +If you have any questions or comments about LLVM, please feel free to contact +us via the `Discourse forums <https://discourse.llvm.org>`_. diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 8f7ab2f9df389e..eb9c84d70d3150 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -94,6 +94,7 @@ class DataLayout { unsigned AllocaAddrSpace = 0; unsigned ProgramAddrSpace = 0; unsigned DefaultGlobalsAddrSpace = 0; + std::optional<unsigned> UndesirableAddressSpace; MaybeAlign StackNaturalAlign; MaybeAlign FunctionPtrAlign; @@ -245,6 +246,9 @@ class DataLayout { unsigned getDefaultGlobalsAddressSpace() const { return DefaultGlobalsAddrSpace; } + std::optional<unsigned> getUndesirableAddressSpace() const { + return UndesirableAddressSpace; + } bool hasMicrosoftFastStdCallMangling() const { return ManglingMode == MM_WinCOFFX86; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 477b77a6dd5335..a1584125016a7a 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5574,6 +5574,10 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { if (!DL.contains("-p9") && !DL.starts_with("p9")) Res.append("-p9:192:256:256:32"); + // Add flat address space. + if (!DL.contains("-U0") && !DL.starts_with("U0")) + Res.append("-U0"); + return Res; } @@ -5636,6 +5640,12 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str(); } + if (T.isNVPTX()) { + // Add flat address space. + if (!DL.contains("-U0") && !DL.starts_with("U0")) + Res.append("-U0"); + } + return Res; } diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index d295d1f5785eb9..1fbd6a909870c4 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -228,6 +228,7 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) { AllocaAddrSpace = Other.AllocaAddrSpace; ProgramAddrSpace = Other.ProgramAddrSpace; DefaultGlobalsAddrSpace = Other.DefaultGlobalsAddrSpace; + UndesirableAddressSpace = Other.UndesirableAddressSpace; StackNaturalAlign = Other.StackNaturalAlign; FunctionPtrAlign = Other.FunctionPtrAlign; TheFunctionPtrAlignType = Other.TheFunctionPtrAlignType; @@ -250,6 +251,7 @@ bool DataLayout::operator==(const DataLayout &Other) const { AllocaAddrSpace == Other.AllocaAddrSpace && ProgramAddrSpace == Other.ProgramAddrSpace && DefaultGlobalsAddrSpace == Other.DefaultGlobalsAddrSpace && + UndesirableAddressSpace == Other.UndesirableAddressSpace && StackNaturalAlign == Other.StackNaturalAlign && FunctionPtrAlign == Other.FunctionPtrAlign && TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType && @@ -568,6 +570,15 @@ Error DataLayout::parseSpecification(StringRef Spec) { return Err; break; } + case 'U': { // Flat address space. + if (Rest.empty()) + return createSpecFormatError("U<address space>"); + unsigned AS = ~0U; + if (Error Err = parseAddrSpace(Rest, AS)) + return Err; + UndesirableAddressSpace.emplace(AS); + break; + } case 'm': if (!Rest.consume_front(":") || Rest.empty()) return createSpecFormatError("m:<mangling>"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 16e23879cd735c..7b45c56d5c0764 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -627,7 +627,7 @@ static StringRef computeDataLayout(const Triple &TT) { "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-" "v32:32-v48:64-v96:" "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-" - "G1-ni:7:8:9"; + "G1-ni:7:8:9-U0"; } LLVM_READNONE diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 7d04cf3dc51e67..52f6efb1a5e92b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -117,7 +117,7 @@ static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) { else if (UseShortPointers) Ret += "-p3:32:32-p4:32:32-p5:32:32"; - Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64"; + Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64-U0"; return Ret; } diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index e5384b2eb2c2c1..8f7f0ffb1f3597 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -55,13 +55,13 @@ static std::string computeDataLayout(const Triple &TT) { // mean anything. if (Arch == Triple::spirv32) return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"; + "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-U4"; if (TT.getVendor() == Triple::VendorType::AMD && TT.getOS() == Triple::OSType::AMDHSA) return "e-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0"; + "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0-U4"; return "e-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"; + "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-U4"; } static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index 1cd4a47c75739b..d699b7a216fde3 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -36,12 +36,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { // Check that AMDGPU targets add -G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1"); // and that ANDGCN adds p7 and p8 as well. - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64", "amdgcn"), - "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), - "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"), + "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:" + "32-U0"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), + "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:" + "32-U0"); // but that r600 does not. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G1", "r600"), "e-p:32:32-G1"); @@ -56,7 +56,7 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-" "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:" "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-" - "p9:192:256:256:32"); + "p9:192:256:256:32-U0"); // Check that RISCV64 upgrades -n64 to -n32:64. EXPECT_EQ(UpgradeDataLayoutString("e-m:e-p:64:64-i64:64-i128:128-n64-S128", @@ -106,23 +106,23 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { // Check that AMDGPU targets don't add -G1 if there is already a -G flag. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2"); EXPECT_EQ(UpgradeDataLayoutString("G2", "r600"), "G2"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), - "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), - "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), - "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), + "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:" + "32-U0"); + EXPECT_EQ(UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), + "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:" + "32-U0"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), + "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:" + "32-U0"); // Check that AMDGCN targets don't add already declared address space 7. EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64", "amdgcn"), - "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32"); + "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32-U0"); EXPECT_EQ(UpgradeDataLayoutString("p7:64:64-G2-e-p:64:64", "amdgcn"), - "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32"); + "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32-U0"); EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64-G1", "amdgcn"), - "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32"); + "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32-U0"); // Check that SPIR & SPIRV targets don't add -G1 if there is already a -G // flag. @@ -146,7 +146,7 @@ TEST(DataLayoutUpgradeTest, EmptyDataLayout) { // Check that AMDGPU targets add G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("", "r600"), "G1"); EXPECT_EQ(UpgradeDataLayoutString("", "amdgcn"), - "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-U0"); // Check that SPIR & SPIRV targets add G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("", "spir"), "G1"); diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index e253037e0edcea..4183c4731815a4 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -81,7 +81,7 @@ static constexpr StringLiteral amdgcnDataLayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:" - "64-S32-A5-G1-ni:7:8:9"; + "64-S32-A5-G1-ni:7:8:9-U0"; namespace { struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits