https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108713
>From 0518fe9d148b371a1a148032f3738fb2fe4fd927 Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Sun, 15 Sep 2024 21:53:50 -0400 Subject: [PATCH 1/3] [IR] Introduce `T<address space>` to `DataLayout` to represent flat address space if a target supports it --- clang/lib/Basic/Targets/AMDGPU.cpp | 2 +- clang/lib/Basic/Targets/NVPTX.cpp | 8 ++-- clang/test/CodeGen/target-data.c | 8 ++-- clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl | 2 +- lld/test/ELF/lto/amdgcn-oses.ll | 6 +-- lld/test/ELF/lto/amdgcn.ll | 2 +- llvm/docs/LangRef.rst | 47 ++++++++++++------- llvm/docs/ReleaseNotes.rst | 3 ++ llvm/include/llvm/IR/DataLayout.h | 2 + llvm/lib/IR/AutoUpgrade.cpp | 10 ++++ llvm/lib/IR/DataLayout.cpp | 9 ++++ .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 +- .../Bitcode/DataLayoutUpgradeTest.cpp | 40 ++++++++-------- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 2 +- 15 files changed, 91 insertions(+), 54 deletions(-) diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 3b748d0249d57b..0ee56848a6cb98 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -36,7 +36,7 @@ static const char *const DataLayoutStringAMDGCN = "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" "32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" - "-ni:7:8:9"; + "-ni:7:8:9-T0"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 43b653dc52ce0d..59344c3c71aee2 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -66,12 +66,12 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, HasFloat16 = true; if (TargetPointerWidth == 32) - resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); + resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0"); else if (Opts.NVPTXUseShortPointers) - resetDataLayout( - "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); + resetDataLayout("e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:" + "32-n16:32:64-T0"); else - resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64"); + resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0"); // If possible, get a TargetInfo for our host triple, so we can match its // types. diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 41cbd5a0219d5e..a3d1a8cb9ee234 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -160,11 +160,11 @@ // RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=NVPTX -// NVPTX: target datalayout = "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64" +// NVPTX: target datalayout = "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0" // RUN: %clang_cc1 -triple nvptx64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=NVPTX64 -// NVPTX64: target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +// NVPTX64: target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0" // RUN: %clang_cc1 -triple r600-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=R600 @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-T0" // Test default -target-cpu // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SIDefault -// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-T0" // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=AARCH64 diff --git a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl index bb52f87615214b..b4b246cc082e00 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s -// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-T0" void foo(void) {} diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll index 7a74d0317f2b9e..8903b45565b41f 100644 --- a/lld/test/ELF/lto/amdgcn-oses.ll +++ b/lld/test/ELF/lto/amdgcn-oses.ll @@ -25,7 +25,7 @@ ;--- amdhsa.ll target triple = "amdgcn-amd-amdhsa" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-T0" !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdhsa_code_object_version", i32 500} @@ -36,7 +36,7 @@ define void @_start() { ;--- amdpal.ll target triple = "amdgcn-amd-amdpal" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-T0" define amdgpu_cs void @_start() { ret void @@ -44,7 +44,7 @@ define amdgpu_cs void @_start() { ;--- mesa3d.ll target triple = "amdgcn-amd-mesa3d" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-T0" define void @_start() { ret void diff --git a/lld/test/ELF/lto/amdgcn.ll b/lld/test/ELF/lto/amdgcn.ll index 4281e209fd9789..bcfbc272afdf9a 100644 --- a/lld/test/ELF/lto/amdgcn.ll +++ b/lld/test/ELF/lto/amdgcn.ll @@ -5,7 +5,7 @@ ; Make sure the amdgcn triple is handled target triple = "amdgcn-amd-amdhsa" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-T0" define void @_start() { ret void diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 144b4497ca63ce..dc9b6b9c9ccbf8 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -354,7 +354,7 @@ added in the future: not be used lightly but only for specific situations such as an alternative to the *register pinning* performance technique often used when implementing functional programming languages. At the - moment only X86, AArch64, and RISCV support this convention. The + moment only X86, AArch64, and RISCV support this convention. The following limitations exist: - On *X86-32* only up to 4 bit type parameters are supported. No @@ -685,10 +685,10 @@ implementation defined, the optimizer can't do the latter. The former is challenging as many commonly expected properties, such as ``ptrtoint(v)-ptrtoint(v) == 0``, don't hold for non-integral types. Similar restrictions apply to intrinsics that might examine the pointer bits, -such as :ref:`llvm.ptrmask<int_ptrmask>`. +such as :ref:`llvm.ptrmask<int_ptrmask>`. The alignment information provided by the frontend for a non-integral pointer -(typically using attributes or metadata) must be valid for every possible +(typically using attributes or metadata) must be valid for every possible representation of the pointer. .. _globalvars: @@ -1677,10 +1677,10 @@ Currently, only the following parameter attributes are defined: - The range is allowed to wrap. - The empty range is represented using ``0,0``. - Otherwise, ``a`` and ``b`` are not allowed to be equal. - - This attribute may only be applied to parameters or return values with integer + + This attribute may only be applied to parameters or return values with integer or vector of integer types. - + For vector-typed parameters, the range is applied element-wise. .. _gc: @@ -3050,6 +3050,19 @@ as follows: address space 0, this property only affects the default value to be used when creating globals without additional contextual information (e.g. in LLVM passes). +``T<address space>`` + Specifies the address space for a target's 'flat' address space. Note this + is not necessarily the same as addrspace 0, which LLVM sometimes refers to + as the generic address space. The flat address space is a generic address + space that can be used access multiple segments of memory with different + address spaces. Access of a memory location through a pointer with this + address space is expected to be legal but slower compared to the same memory + location accessed through a pointer with a different address space. This is + for targets with different pointer representations which can be converted + with the addrspacecast instruction. If a pointer is converted to this + address space, optimizations should attempt to replace the access with the + source address space. The absence of this specification indicates the target + does not have such a flat address space to optimize away. .. _alloca_addrspace: @@ -14346,7 +14359,7 @@ Arguments: """""""""" The first 4 arguments are similar to ``llvm.instrprof.increment``. The indexing is specific to callsites, meaning callsites are indexed from 0, independent from -the indexes used by the other intrinsics (such as +the indexes used by the other intrinsics (such as ``llvm.instrprof.increment[.step]``). The last argument is the called value of the callsite this intrinsic precedes. @@ -14360,7 +14373,7 @@ a buffer LLVM can use to perform counter increments (i.e. the lowering of ``llvm.instrprof.increment[.step]``. The address range following the counter buffer, ``<num-counters>`` x ``sizeof(ptr)`` - sized, is expected to contain pointers to contexts of functions called from this function ("subcontexts"). -LLVM does not dereference into that memory region, just calculates GEPs. +LLVM does not dereference into that memory region, just calculates GEPs. The lowering of ``llvm.instrprof.callsite`` consists of: @@ -14929,8 +14942,8 @@ integer bit width or any vector of integer elements. Overview: """"""""" -Return ``-1`` if ``%a`` is signed less than ``%b``, ``0`` if they are equal, and -``1`` if ``%a`` is signed greater than ``%b``. Vector intrinsics operate on a per-element basis. +Return ``-1`` if ``%a`` is signed less than ``%b``, ``0`` if they are equal, and +``1`` if ``%a`` is signed greater than ``%b``. Vector intrinsics operate on a per-element basis. Arguments: """""""""" @@ -14958,8 +14971,8 @@ integer bit width or any vector of integer elements. Overview: """"""""" -Return ``-1`` if ``%a`` is unsigned less than ``%b``, ``0`` if they are equal, and -``1`` if ``%a`` is unsigned greater than ``%b``. Vector intrinsics operate on a per-element basis. +Return ``-1`` if ``%a`` is unsigned less than ``%b``, ``0`` if they are equal, and +``1`` if ``%a`` is unsigned greater than ``%b``. Vector intrinsics operate on a per-element basis. Arguments: """""""""" @@ -21556,9 +21569,9 @@ Semantics: """""""""" The '``llvm.vp.minimum``' intrinsic performs floating-point minimum (:ref:`minimum <i_minimum>`) -of the first and second vector arguments on each enabled lane, the result being +of the first and second vector arguments on each enabled lane, the result being NaN if either argument is a NaN. -0.0 is considered to be less than +0.0 for this -intrinsic. The result on disabled lanes is a :ref:`poison value <poisonvalues>`. +intrinsic. The result on disabled lanes is a :ref:`poison value <poisonvalues>`. The operation is performed in the default floating-point environment. Examples: @@ -29191,7 +29204,7 @@ Semantics: """""""""" The intrinsic ``@llvm.allow.ubsan.check()`` returns either ``true`` or -``false``, depending on compiler options. +``false``, depending on compiler options. For each evaluation of a call to this intrinsic, the program must be valid and correct both if it returns ``true`` and if it returns ``false``. @@ -29250,13 +29263,13 @@ Semantics: """""""""" The intrinsic ``@llvm.allow.runtime.check()`` returns either ``true`` or -``false``, depending on compiler options. +``false``, depending on compiler options. For each evaluation of a call to this intrinsic, the program must be valid and correct both if it returns ``true`` and if it returns ``false``. When used in a branch condition, it allows us to choose between -two alternative correct solutions for the same problem. +two alternative correct solutions for the same problem. If the intrinsic is evaluated as ``true``, program should execute a guarded check. If the intrinsic is evaluated as ``false``, the program should avoid any diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 52456896f2fc6c..c4edbc6daff774 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -56,6 +56,9 @@ Changes to the LLVM IR * Added ``usub_cond`` and ``usub_sat`` operations to ``atomicrmw``. +* Added ``T<address space>`` to data layout to represent flat address space if a + target has it. + Changes to LLVM infrastructure ------------------------------ diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 8f7ab2f9df389e..5ee388a5d059dd 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -94,6 +94,7 @@ class DataLayout { unsigned AllocaAddrSpace = 0; unsigned ProgramAddrSpace = 0; unsigned DefaultGlobalsAddrSpace = 0; + unsigned FlatAddressSpace = ~0U; MaybeAlign StackNaturalAlign; MaybeAlign FunctionPtrAlign; @@ -245,6 +246,7 @@ class DataLayout { unsigned getDefaultGlobalsAddressSpace() const { return DefaultGlobalsAddrSpace; } + unsigned getFlatAddressSpace() const { return FlatAddressSpace; } bool hasMicrosoftFastStdCallMangling() const { return ManglingMode == MM_WinCOFFX86; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 69dae5e32dbbe8..a08700d4585985 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5451,6 +5451,10 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { if (!DL.contains("-p9") && !DL.starts_with("p9")) Res.append("-p9:192:256:256:32"); + // Add flat address space. + if (!DL.contains("-T0") && !DL.starts_with("T0")) + Res.append("-T0"); + return Res; } @@ -5501,6 +5505,12 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str(); } + if (T.isNVPTX()) { + // Add flat address space. + if (!DL.contains("-T0") && !DL.starts_with("T0")) + Res.append("-T0"); + } + return Res; } diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index d295d1f5785eb9..51396241515647 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -228,6 +228,7 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) { AllocaAddrSpace = Other.AllocaAddrSpace; ProgramAddrSpace = Other.ProgramAddrSpace; DefaultGlobalsAddrSpace = Other.DefaultGlobalsAddrSpace; + FlatAddressSpace = Other.FlatAddressSpace; StackNaturalAlign = Other.StackNaturalAlign; FunctionPtrAlign = Other.FunctionPtrAlign; TheFunctionPtrAlignType = Other.TheFunctionPtrAlignType; @@ -250,6 +251,7 @@ bool DataLayout::operator==(const DataLayout &Other) const { AllocaAddrSpace == Other.AllocaAddrSpace && ProgramAddrSpace == Other.ProgramAddrSpace && DefaultGlobalsAddrSpace == Other.DefaultGlobalsAddrSpace && + FlatAddressSpace == Other.FlatAddressSpace && StackNaturalAlign == Other.StackNaturalAlign && FunctionPtrAlign == Other.FunctionPtrAlign && TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType && @@ -568,6 +570,13 @@ Error DataLayout::parseSpecification(StringRef Spec) { return Err; break; } + case 'T': { // Flat address space. + if (Rest.empty()) + return createSpecFormatError("T<address space>"); + if (Error Err = parseAddrSpace(Rest, FlatAddressSpace)) + return Err; + break; + } case 'm': if (!Rest.consume_front(":") || Rest.empty()) return createSpecFormatError("m:<mangling>"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index f860b139945122..2e26cb969206f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -579,7 +579,7 @@ static StringRef computeDataLayout(const Triple &TT) { "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-" "v32:32-v48:64-v96:" "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-" - "G1-ni:7:8:9"; + "G1-ni:7:8:9-T0"; } LLVM_READNONE diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 57b7fa783c14a7..eb7f29655e6441 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -117,7 +117,7 @@ static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) { else if (UseShortPointers) Ret += "-p3:32:32-p4:32:32-p5:32:32"; - Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64"; + Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0"; return Ret; } diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index ca50187e5e5ee0..85029297a72df2 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -36,12 +36,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { // Check that AMDGPU targets add -G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1"); // and that ANDGCN adds p7 and p8 as well. - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64", "amdgcn"), - "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), - "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"), + "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:" + "32-T0"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), + "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:" + "32-T0"); // but that r600 does not. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G1", "r600"), "e-p:32:32-G1"); @@ -56,7 +56,7 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-" "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:" "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-" - "p9:192:256:256:32"); + "p9:192:256:256:32-T0"); // Check that RISCV64 upgrades -n64 to -n32:64. EXPECT_EQ(UpgradeDataLayoutString("e-m:e-p:64:64-i64:64-i128:128-n64-S128", @@ -99,23 +99,23 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { // Check that AMDGPU targets don't add -G1 if there is already a -G flag. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2"); EXPECT_EQ(UpgradeDataLayoutString("G2", "r600"), "G2"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), - "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), - "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), - "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), + "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:" + "32-T0"); + EXPECT_EQ(UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), + "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:" + "32-T0"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), + "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:" + "32-T0"); // Check that AMDGCN targets don't add already declared address space 7. EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64", "amdgcn"), - "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32"); + "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32-T0"); EXPECT_EQ(UpgradeDataLayoutString("p7:64:64-G2-e-p:64:64", "amdgcn"), - "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32"); + "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32-T0"); EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64-G1", "amdgcn"), - "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32"); + "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32-T0"); // Check that SPIR & SPIRV targets don't add -G1 if there is already a -G // flag. @@ -139,7 +139,7 @@ TEST(DataLayoutUpgradeTest, EmptyDataLayout) { // Check that AMDGPU targets add G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("", "r600"), "G1"); EXPECT_EQ(UpgradeDataLayoutString("", "amdgcn"), - "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-T0"); // Check that SPIR & SPIRV targets add G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("", "spir"), "G1"); diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index fc3e1fc4f9d0c9..e6e3d180e1d29f 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -81,7 +81,7 @@ static constexpr StringLiteral amdgcnDataLayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:" - "64-S32-A5-G1-ni:7:8:9"; + "64-S32-A5-G1-ni:7:8:9-T0"; namespace { struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> { >From 8db0d52053b0cde8be13ddb6c669b6b262eefdf8 Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Sun, 15 Sep 2024 23:06:14 -0400 Subject: [PATCH 2/3] [TargetTransformInfo] Remove `getFlatAddressSpace` This has been moved to `DataLayout`. --- .../llvm/Analysis/TargetTransformInfo.h | 21 ------------------- .../llvm/Analysis/TargetTransformInfoImpl.h | 2 -- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 5 ----- llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ---- .../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 8 ------- .../Target/NVPTX/NVPTXTargetTransformInfo.h | 4 ---- .../Transforms/Scalar/InferAddressSpaces.cpp | 2 +- 7 files changed, 1 insertion(+), 45 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index b2124c6106198e..e5986225b6fc32 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -451,24 +451,6 @@ class TargetTransformInfo { /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address. bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const; - /// Returns the address space ID for a target's 'flat' address space. Note - /// this is not necessarily the same as addrspace(0), which LLVM sometimes - /// refers to as the generic address space. The flat address space is a - /// generic address space that can be used access multiple segments of memory - /// with different address spaces. Access of a memory location through a - /// pointer with this address space is expected to be legal but slower - /// compared to the same memory location accessed through a pointer with a - /// different address space. - // - /// This is for targets with different pointer representations which can - /// be converted with the addrspacecast instruction. If a pointer is converted - /// to this address space, optimizations should attempt to replace the access - /// with the source address space. - /// - /// \returns ~0u if the target does not have such a flat address space to - /// optimize away. - unsigned getFlatAddressSpace() const; - /// Return any intrinsic address operand indexes which may be rewritten if /// they use a flat address space pointer. /// @@ -1836,7 +1818,6 @@ class TargetTransformInfo::Concept { virtual bool isAlwaysUniform(const Value *V) = 0; virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0; - virtual unsigned getFlatAddressSpace() = 0; virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const = 0; virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; @@ -2263,8 +2244,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.addrspacesMayAlias(AS0, AS1); } - unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); } - bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const override { return Impl.collectFlatAddressOperands(OpIndexes, IID); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 90eef93a2a54d5..192a1c15347dc7 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -115,8 +115,6 @@ class TargetTransformInfoImplBase { return true; } - unsigned getFlatAddressSpace() const { return -1; } - bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const { return false; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 50dc7d5c54c54a..05b0e5844ac5d5 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -292,11 +292,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { return true; } - unsigned getFlatAddressSpace() { - // Return an invalid address space. - return -1; - } - bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const { return false; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 2c26493bd3f1ca..5eb6be7a362cb5 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -305,10 +305,6 @@ bool llvm::TargetTransformInfo::addrspacesMayAlias(unsigned FromAS, return TTIImpl->addrspacesMayAlias(FromAS, ToAS); } -unsigned TargetTransformInfo::getFlatAddressSpace() const { - return TTIImpl->getFlatAddressSpace(); -} - bool TargetTransformInfo::collectFlatAddressOperands( SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const { return TTIImpl->collectFlatAddressOperands(OpIndexes, IID); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 01df2e6caaba1d..6f32e439231273 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -200,14 +200,6 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { return AMDGPU::addrspacesMayAlias(AS0, AS1); } - unsigned getFlatAddressSpace() const { - // Don't bother running InferAddressSpaces pass on graphics shaders which - // don't use flat addressing. - if (IsGraphics) - return -1; - return AMDGPUAS::FLAT_ADDRESS; - } - bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 4160f5f6bfae76..fd03b565ccf917 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -45,10 +45,6 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> { bool isSourceOfDivergence(const Value *V); - unsigned getFlatAddressSpace() const { - return AddressSpace::ADDRESS_SPACE_GENERIC; - } - bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { return AS != AddressSpace::ADDRESS_SPACE_SHARED && AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM; diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 566cdc51f6e74a..6a40d661035a9b 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -854,7 +854,7 @@ bool InferAddressSpacesImpl::run(Function &CurFn) { FlatAddrSpace = 0; if (FlatAddrSpace == UninitializedAddressSpace) { - FlatAddrSpace = TTI->getFlatAddressSpace(); + FlatAddrSpace = DL->getFlatAddressSpace(); if (FlatAddrSpace == UninitializedAddressSpace) return false; } >From 30c83b88205b5ab391c2f94194fbb0b5ce6f5f50 Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Thu, 12 Sep 2024 15:25:43 -0400 Subject: [PATCH 3/3] [Attributor] Use more appropriate approach to check flat address space --- llvm/include/llvm/Transforms/IPO/Attributor.h | 6 ++--- llvm/lib/Transforms/IPO/Attributor.cpp | 1 + .../Transforms/IPO/AttributorAttributes.cpp | 26 ++++++++++++++----- .../Attributor/address_space_info.ll | 4 ++- .../test/Transforms/Attributor/nocapture-1.ll | 4 +-- .../Transforms/Attributor/value-simplify.ll | 3 +-- 6 files changed, 29 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 921fe945539510..d53a9810eb1df6 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1332,7 +1332,7 @@ struct InformationCache { bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); } /// Return true if the target is a GPU. - bool targetIsGPU() { + bool targetIsGPU() const { return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX(); } @@ -6267,8 +6267,8 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> { return (AA->getIdAddr() == &ID); } - // No address space which indicates the associated value is dead. - static const uint32_t NoAddressSpace = ~0U; + // Invalid address space which indicates the associated value is dead. + static const uint32_t InvalidAddressSpace = ~0U; /// Unique ID (due to the unique address) static const char ID; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 56d1133b25549a..7339b00eb78868 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/AttributeMask.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 217c7cccb5775a..72ac08ec2b6e1c 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12571,8 +12571,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace { void initialize(Attributor &A) override { assert(getAssociatedType()->isPtrOrPtrVectorTy() && "Associated value is not a pointer"); - if (getAssociatedType()->getPointerAddressSpace()) + + unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace(); + if (FlatAS == InvalidAddressSpace) { + indicatePessimisticFixpoint(); + return; + } + + unsigned AS = getAssociatedType()->getPointerAddressSpace(); + if (AS != FlatAS) { + [[maybe_unused]] bool R = takeAddressSpace(AS); + assert(R && "The take should happen"); indicateOptimisticFixpoint(); + } } ChangeStatus updateImpl(Attributor &A) override { @@ -12594,12 +12605,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { - Value *AssociatedValue = &getAssociatedValue(); - Value *OriginalValue = peelAddrspacecast(AssociatedValue); - if (getAddressSpace() == NoAddressSpace || + if (getAddressSpace() == InvalidAddressSpace || getAddressSpace() == getAssociatedType()->getPointerAddressSpace()) return ChangeStatus::UNCHANGED; + Value *AssociatedValue = &getAssociatedValue(); + Value *OriginalValue = peelAddrspacecast(AssociatedValue); + PointerType *NewPtrTy = PointerType::get(getAssociatedType()->getContext(), getAddressSpace()); bool UseOriginalValue = @@ -12646,17 +12658,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace { if (!isValidState()) return "addrspace(<invalid>)"; return "addrspace(" + - (AssumedAddressSpace == NoAddressSpace + (AssumedAddressSpace == InvalidAddressSpace ? "none" : std::to_string(AssumedAddressSpace)) + ")"; } private: - uint32_t AssumedAddressSpace = NoAddressSpace; + uint32_t AssumedAddressSpace = InvalidAddressSpace; bool takeAddressSpace(uint32_t AS) { - if (AssumedAddressSpace == NoAddressSpace) { + if (AssumedAddressSpace == InvalidAddressSpace) { AssumedAddressSpace = AS; return true; } diff --git a/llvm/test/Transforms/Attributor/address_space_info.ll b/llvm/test/Transforms/Attributor/address_space_info.ll index 73dd93c55b819b..0c8b06ac6666a4 100644 --- a/llvm/test/Transforms/Attributor/address_space_info.ll +++ b/llvm/test/Transforms/Attributor/address_space_info.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --prefix-filecheck-ir-name true -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefix=CHECK + +; REQUIRES: amdgpu-registered-target @dst = dso_local addrspace(1) externally_initialized global i32 0, align 4 @g1 = dso_local addrspace(1) externally_initialized global ptr null, align 4 diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 3401ddfdd7d758..de5f31e470edfc 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -257,7 +257,7 @@ define i32 @nc1_addrspace(ptr %q, ptr addrspace(1) %p, i1 %b) { ; TUNIT-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr ; TUNIT-NEXT: [[TMP2:%.*]] = select i1 [[B]], ptr [[TMP]], ptr [[Q]] ; TUNIT-NEXT: [[VAL:%.*]] = load i32, ptr [[TMP2]], align 4 -; TUNIT-NEXT: store i32 0, ptr addrspace(1) [[P]], align 4 +; TUNIT-NEXT: store i32 0, ptr [[TMP]], align 4 ; TUNIT-NEXT: store ptr [[Q]], ptr @g, align 8 ; TUNIT-NEXT: ret i32 [[VAL]] ; @@ -272,7 +272,7 @@ define i32 @nc1_addrspace(ptr %q, ptr addrspace(1) %p, i1 %b) { ; CGSCC-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr ; CGSCC-NEXT: [[TMP2:%.*]] = select i1 [[B]], ptr [[TMP]], ptr [[Q]] ; CGSCC-NEXT: [[VAL:%.*]] = load i32, ptr [[TMP2]], align 4 -; CGSCC-NEXT: store i32 0, ptr addrspace(1) [[P]], align 4 +; CGSCC-NEXT: store i32 0, ptr [[TMP]], align 4 ; CGSCC-NEXT: store ptr [[Q]], ptr @g, align 8 ; CGSCC-NEXT: ret i32 [[VAL]] ; diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index 68f179c88116e4..a5789790cc92a1 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -838,8 +838,7 @@ define void @user() { ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@user ; TUNIT-SAME: () #[[ATTR5]] { -; TUNIT-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspacecast (ptr addrspace(3) @ConstAS3Ptr to ptr) to ptr addrspace(3) -; TUNIT-NEXT: store i32 0, ptr addrspace(3) [[TMP1]], align 4 +; TUNIT-NEXT: store i32 0, ptr addrspacecast (ptr addrspace(3) @ConstAS3Ptr to ptr), align 4 ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(write) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits