================ @@ -0,0 +1,258 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -o - -mcpu=sm_90 -march=nvptx64 -mattr=+ptx80 | FileCheck %s +; RUN: %if ptxas-12.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %} + +target triple = "nvptx64-nvidia-cuda" + +@llvm.used = appending global [1 x ptr] [ptr @test_distributed_shared_cluster], section "llvm.metadata" + +declare ptr addrspace(7) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3), i32) +declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr) +declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +declare ptr @llvm.nvvm.mapa(ptr, i32) + +define i32 @test_distributed_shared_cluster(ptr %ptr, ptr addrspace(3) %smem_ptr) local_unnamed_addr { ---------------- AlexMaclean wrote:
Please break this up into more than one big function. Perhaps group some of the atomic cases together but otherwise each instruction/intrinsic should have it's own function. This will make the test easier to debug if something starts failing at some point. https://github.com/llvm/llvm-project/pull/135444 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits