llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: None (macurtis-amd) <details> <summary>Changes</summary> --- Patch is 400.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162575.diff 15 Files Affected: - (modified) clang/docs/HIPSupport.rst (+2) - (modified) clang/docs/LanguageExtensions.rst (+1) - (modified) clang/include/clang/Basic/SyncScope.h (+32-10) - (modified) clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (+15-14) - (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+4) - (modified) clang/lib/CodeGen/Targets/SPIR.cpp (+2) - (modified) clang/lib/Frontend/InitPreprocessor.cpp (+2) - (modified) clang/test/CodeGen/scoped-atomic-ops.c (+4293-190) - (modified) clang/test/CodeGen/scoped-fence-ops.c (+28-13) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl (+41-18) - (modified) clang/test/Preprocessor/init-aarch64.c (+2) - (modified) clang/test/Preprocessor/init-loongarch.c (+12-10) - (modified) clang/test/Preprocessor/init.c (+11-6) - (modified) clang/test/SemaCUDA/atomic-ops.cu (+7-2) - (modified) clang/test/SemaCUDA/spirv-amdgcn-atomic-ops.cu (+7-2) ``````````diff diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst index b4a671e3cfa3c..ec2af2a6f569d 100644 --- a/clang/docs/HIPSupport.rst +++ b/clang/docs/HIPSupport.rst @@ -164,6 +164,8 @@ Predefined Macros - Represents wavefront memory scope in HIP (value is 2). * - ``__HIP_MEMORY_SCOPE_WORKGROUP`` - Represents workgroup memory scope in HIP (value is 3). + * - ``__HIP_MEMORY_SCOPE_CLUSTER`` + - Represents cluster memory scope in HIP (value is 6). * - ``__HIP_MEMORY_SCOPE_AGENT`` - Represents agent memory scope in HIP (value is 4). * - ``__HIP_MEMORY_SCOPE_SYSTEM`` diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 6bb99c757cd19..bef6e9c14b182 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -4846,6 +4846,7 @@ currently supported: * ``__MEMORY_SCOPE_SYSTEM`` * ``__MEMORY_SCOPE_DEVICE`` * ``__MEMORY_SCOPE_WRKGRP`` +* ``__MEMORY_SCOPE_CLUSTR`` * ``__MEMORY_SCOPE_WVFRNT`` * ``__MEMORY_SCOPE_SINGLE`` diff --git a/clang/include/clang/Basic/SyncScope.h b/clang/include/clang/Basic/SyncScope.h index 5a8d2a7dd02e5..614e5faa78696 100644 --- a/clang/include/clang/Basic/SyncScope.h +++ b/clang/include/clang/Basic/SyncScope.h @@ -43,11 +43,13 @@ enum class SyncScope { SystemScope, DeviceScope, WorkgroupScope, + ClusterScope, WavefrontScope, SingleScope, HIPSingleThread, HIPWavefront, HIPWorkgroup, + HIPCluster, HIPAgent, HIPSystem, OpenCLWorkGroup, @@ -65,6 +67,8 @@ inline llvm::StringRef getAsString(SyncScope S) { return "device_scope"; case SyncScope::WorkgroupScope: return "workgroup_scope"; + case SyncScope::ClusterScope: + return "cluster_scope"; case SyncScope::WavefrontScope: return "wavefront_scope"; case SyncScope::SingleScope: @@ -75,6 +79,8 @@ inline llvm::StringRef getAsString(SyncScope S) { return "hip_wavefront"; case SyncScope::HIPWorkgroup: return "hip_workgroup"; + case SyncScope::HIPCluster: + return "hip_cluster"; case SyncScope::HIPAgent: return "hip_agent"; case SyncScope::HIPSystem: @@ -180,7 +186,10 @@ class AtomicScopeHIPModel : public AtomicScopeModel { Workgroup = 3, Agent = 4, System = 5, - Last = System + Cluster = 6, + End, + Last = End - 1, + Count = Last }; AtomicScopeHIPModel() {} @@ -193,10 +202,14 @@ class AtomicScopeHIPModel : public AtomicScopeModel { return SyncScope::HIPWavefront; case Workgroup: return SyncScope::HIPWorkgroup; + case Cluster: + return SyncScope::HIPCluster; case Agent: return SyncScope::HIPAgent; case System: return SyncScope::HIPSystem; + case End: + break; } llvm_unreachable("Invalid language sync scope value"); } @@ -207,11 +220,12 @@ class AtomicScopeHIPModel : public AtomicScopeModel { } ArrayRef<unsigned> getRuntimeValues() const override { - static_assert(Last == System, "Does not include all sync scopes"); static const unsigned Scopes[] = { static_cast<unsigned>(SingleThread), static_cast<unsigned>(Wavefront), - static_cast<unsigned>(Workgroup), static_cast<unsigned>(Agent), - static_cast<unsigned>(System)}; + static_cast<unsigned>(Workgroup), static_cast<unsigned>(Cluster), + static_cast<unsigned>(System), static_cast<unsigned>(Agent)}; + static_assert(sizeof(Scopes) / sizeof(Scopes[0]) == Count, + "Does not include all sync scopes"); return llvm::ArrayRef(Scopes); } @@ -223,14 +237,17 @@ class AtomicScopeHIPModel : public AtomicScopeModel { /// Defines the generic atomic scope model. class AtomicScopeGenericModel : public AtomicScopeModel { public: - /// The enum values match predefined built-in macros __ATOMIC_SCOPE_*. + /// The enum values match predefined built-in macros __MEMORY_SCOPE_*. enum ID { System = 0, Device = 1, Workgroup = 2, Wavefront = 3, Single = 4, - Last = Single + Cluster = 5, + End, + Last = End - 1, + Count = End }; AtomicScopeGenericModel() = default; @@ -243,10 +260,14 @@ class AtomicScopeGenericModel : public AtomicScopeModel { return SyncScope::SystemScope; case Workgroup: return SyncScope::WorkgroupScope; + case Cluster: + return SyncScope::ClusterScope; case Wavefront: return SyncScope::WavefrontScope; case Single: return SyncScope::SingleScope; + case End: + break; } llvm_unreachable("Invalid language sync scope value"); } @@ -256,11 +277,12 @@ class AtomicScopeGenericModel : public AtomicScopeModel { } ArrayRef<unsigned> getRuntimeValues() const override { - static_assert(Last == Single, "Does not include all sync scopes"); static const unsigned Scopes[] = { - static_cast<unsigned>(Device), static_cast<unsigned>(System), - static_cast<unsigned>(Workgroup), static_cast<unsigned>(Wavefront), - static_cast<unsigned>(Single)}; + static_cast<unsigned>(System), static_cast<unsigned>(Device), + static_cast<unsigned>(Workgroup), static_cast<unsigned>(Cluster), + static_cast<unsigned>(Wavefront), static_cast<unsigned>(Single)}; + static_assert(sizeof(Scopes) / sizeof(Scopes[0]) == Count, + "Does not include all sync scopes"); return llvm::ArrayRef(Scopes); } diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 6596ec06199dc..97b5828011cd4 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "CGBuiltin.h" +#include "clang/Basic/SyncScope.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -242,33 +243,33 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, } // Older builtins had an enum argument for the memory scope. + const char *ssn = nullptr; int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue(); switch (scope) { - case 0: // __MEMORY_SCOPE_SYSTEM + case AtomicScopeGenericModel::System: // __MEMORY_SCOPE_SYSTEM SSID = llvm::SyncScope::System; break; - case 1: // __MEMORY_SCOPE_DEVICE - if (getTarget().getTriple().isSPIRV()) - SSID = getLLVMContext().getOrInsertSyncScopeID("device"); - else - SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + case AtomicScopeGenericModel::Device: // __MEMORY_SCOPE_DEVICE + ssn = getTarget().getTriple().isSPIRV() ? "device" : "agent"; break; - case 2: // __MEMORY_SCOPE_WRKGRP - SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup"); + case AtomicScopeGenericModel::Workgroup: // __MEMORY_SCOPE_WRKGRP + ssn = "workgroup"; break; - case 3: // __MEMORY_SCOPE_WVFRNT - if (getTarget().getTriple().isSPIRV()) - SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup"); - else - SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); + case AtomicScopeGenericModel::Cluster: // __MEMORY_SCOPE_CLUSTR + ssn = getTarget().getTriple().isSPIRV() ? "workgroup" : "cluster"; + break; + case AtomicScopeGenericModel::Wavefront: // __MEMORY_SCOPE_WVFRNT + ssn = getTarget().getTriple().isSPIRV() ? "subgroup" : "wavefront"; break; - case 4: // __MEMORY_SCOPE_SINGLE + case AtomicScopeGenericModel::Single: // __MEMORY_SCOPE_SINGLE SSID = llvm::SyncScope::SingleThread; break; default: SSID = llvm::SyncScope::System; break; } + if (ssn) + SSID = getLLVMContext().getOrInsertSyncScopeID(ssn); } llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 0fcbf7e458a34..c74a1b6098922 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -488,6 +488,10 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, case SyncScope::WavefrontScope: Name = "wavefront"; break; + case SyncScope::HIPCluster: + case SyncScope::ClusterScope: + Name = "cluster"; + break; case SyncScope::HIPWorkgroup: case SyncScope::OpenCLWorkGroup: case SyncScope::WorkgroupScope: diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 4aa63143a66cd..fbf29186faf24 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -90,6 +90,8 @@ inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) { case SyncScope::OpenCLSubGroup: case SyncScope::WavefrontScope: return "subgroup"; + case SyncScope::HIPCluster: + case SyncScope::ClusterScope: case SyncScope::HIPWorkgroup: case SyncScope::OpenCLWorkGroup: case SyncScope::WorkgroupScope: diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index b899fb9c6494a..21ab9dca8f0bd 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -616,6 +616,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3"); Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4"); Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_CLUSTER", "6"); if (LangOpts.HIPStdPar) { Builder.defineMacro("__HIPSTDPAR__"); if (LangOpts.HIPStdParInterposeAlloc) { @@ -904,6 +905,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__MEMORY_SCOPE_WRKGRP", "2"); Builder.defineMacro("__MEMORY_SCOPE_WVFRNT", "3"); Builder.defineMacro("__MEMORY_SCOPE_SINGLE", "4"); + Builder.defineMacro("__MEMORY_SCOPE_CLUSTR", "5"); // Define macros for the OpenCL memory scope. // The values should match AtomicScopeOpenCLModel::ID enum. diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c index 545a6c90892c2..c39048120a457 100644 --- a/clang/test/CodeGen/scoped-atomic-ops.c +++ b/clang/test/CodeGen/scoped-atomic-ops.c @@ -1,113 +1,772 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ -// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s +// RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_DEF %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ -// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s +// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_20 %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \ // RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s -// AMDGCN-LABEL: define hidden i32 @fi1a( -// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4 -// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent") monotonic, align 4 -// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4 -// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront") monotonic, align 4 -// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4 -// SPIRV: define hidden spir_func i32 @fi1a( -// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4 -// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4 -// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4 -// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4 -// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4 +// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi1a( +// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] +// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN_CL_DEF-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// AMDGCN_CL_DEF-NEXT: [[V_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr +// AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// AMDGCN_CL_DEF-NEXT: store i32 [[TMP1]], ptr [[V_ASCAST]], align 4 +// AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4 +// AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[V_ASCAST]], align 4 +// AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 +// AMDGCN_CL_DEF-NEXT: store i32 [[TMP5]], ptr [[V_ASCAST]], align 4 +// AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4 +// AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[V_ASCAST]], align 4 +// AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("wavefront") monotonic, align 4 +// AMDGCN_CL_DEF-NEXT: store i32 [[TMP9]], ptr [[V_ASCAST]], align 4 +// AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 +// AMDGCN_CL_DEF-NEXT: store i32 [[TMP11]], ptr [[V_ASCAST]], align 4 +// AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = load i32, ptr [[V_ASCAST]], align 4 +// AMDGCN_CL_DEF-NEXT: ret i32 [[TMP12]] +// +// AMDGCN_CL_20-LABEL: define hidden i32 @fi1a( +// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] +// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN_CL_20-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// AMDGCN_CL_20-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[V]], align 4 +// AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4 +// AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[V]], align 4 +// AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 +// AMDGCN_CL_20-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[V]], align 4 +// AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4 +// AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr addrspace(5) [[V]], align 4 +// AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("wavefront") monotonic, align 4 +// AMDGCN_CL_20-NEXT: store i32 [[TMP9]], ptr addrspace(5) [[V]], align 4 +// AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 +// AMDGCN_CL_20-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[V]], align 4 +// AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(5) [[V]], align 4 +// AMDGCN_CL_20-NEXT: ret i32 [[TMP12]] +// +// SPIRV-LABEL: define hidden spir_func i32 @fi1a( +// SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// SPIRV-NEXT: [[V:%.*]] = alloca i32, align 4 +// SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP1]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("device") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP3]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP5]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("workgroup") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP7]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("subgroup") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP9]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// SPIRV-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 +// SPIRV-NEXT: store i32 [[TMP11]], ptr [[V]], align 4 +// SPIRV-NEXT: [[TMP12:%.*]] = load i32, ptr [[V]], align 4 +// SPIRV-NEXT: ret i32 [[TMP12]] +// int fi1a(int *i) { int v; __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); + __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); return v; } // AMDGCN-LABEL: define hidden i32 @fi1b( -// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] monotonic, align 4 -// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent") monotonic, align 4 -// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 -// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront") monotonic, align 4 -// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 +// AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5) +// AMDGCN-NEXT: [[ATOMIC_... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/162575 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
