================
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -o - -mcpu=sm_90 -march=nvptx64 -mattr=+ptx80 | FileCheck %s
+; RUN: %if ptxas-12.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| 
%ptxas-verify -arch=sm_90 %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+@llvm.used = appending global [1 x ptr] [ptr 
@test_distributed_shared_cluster], section "llvm.metadata"
+
+declare ptr addrspace(7) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3), i32)
+declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr)
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+declare ptr @llvm.nvvm.mapa(ptr, i32)
+
+define i32 @test_distributed_shared_cluster(ptr %ptr, ptr addrspace(3) 
%smem_ptr) local_unnamed_addr {
+; CHECK-LABEL: test_distributed_shared_cluster(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<13>;
+; CHECK-NEXT:    .reg .b16 %rs<5>;
+; CHECK-NEXT:    .reg .b32 %r<69>;
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-NEXT:    .reg .b64 %rd<24>;
+; CHECK-NEXT:    .reg .f64 %fd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    ld.param.u64 %rd2, [test_distributed_shared_cluster_param_0];
+; CHECK-NEXT:    ld.param.u64 %rd3, [test_distributed_shared_cluster_param_1];
+; CHECK-NEXT:    mov.u32 %r24, %ctaid.x;
+; CHECK-NEXT:    xor.b32 %r25, %r24, 1;
+; CHECK-NEXT:    isspacep.shared::cluster %p1, %rd2;
+; CHECK-NEXT:    mapa.u64 %rd4, %rd2, %r25;
+; CHECK-NEXT:    isspacep.shared::cluster %p2, %rd4;
+; CHECK-NEXT:    mapa.shared::cluster.u64 %rd5, %rd3, %r25;
+; CHECK-NEXT:    mov.b16 %rs1, 0x3C00;
+; CHECK-NEXT:    atom.shared::cluster.add.noftz.f16 %rs2, [%rd5], %rs1;
+; CHECK-NEXT:    mov.b16 %rs3, 0x3F80;
+; CHECK-NEXT:    atom.shared::cluster.add.noftz.bf16 %rs4, [%rd5], %rs3;
+; CHECK-NEXT:    atom.shared::cluster.add.f32 %f1, [%rd5], 0f3F800000;
+; CHECK-NEXT:    atom.shared::cluster.add.f64 %fd1, [%rd5], 0d3FF0000000000000;
+; CHECK-NEXT:    atom.shared::cluster.add.u32 %r26, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.add.u64 %rd6, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.exch.b32 %r27, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.exch.b64 %rd7, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.min.s32 %r28, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.min.s64 %rd8, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.min.u32 %r29, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.min.u64 %rd9, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.max.s32 %r30, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.max.s64 %rd10, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.max.u32 %r31, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.max.u64 %rd11, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.inc.u32 %r32, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.dec.u32 %r33, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.and.b32 %r34, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.and.b64 %rd12, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.or.b32 %r35, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.or.b64 %rd13, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.xor.b32 %r36, [%rd5], 1;
+; CHECK-NEXT:    atom.shared::cluster.xor.b64 %rd14, [%rd5], 1;
+; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r37, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.acquire.shared::cluster.cas.b32 %r38, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.acquire.shared::cluster.cas.b32 %r39, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.release.shared::cluster.cas.b32 %r40, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.acq_rel.shared::cluster.cas.b32 %r41, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.acq_rel.shared::cluster.cas.b32 %r42, [%rd5], 1, 0;
+; CHECK-NEXT:    fence.sc.sys;
+; CHECK-NEXT:    atom.acquire.shared::cluster.cas.b32 %r43, [%rd5], 1, 0;
+; CHECK-NEXT:    fence.sc.sys;
+; CHECK-NEXT:    atom.acquire.shared::cluster.cas.b32 %r44, [%rd5], 1, 0;
+; CHECK-NEXT:    fence.sc.sys;
+; CHECK-NEXT:    atom.acquire.shared::cluster.cas.b32 %r45, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b64 %rd15, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.acquire.shared::cluster.cas.b64 %rd16, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.acquire.shared::cluster.cas.b64 %rd17, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.release.shared::cluster.cas.b64 %rd18, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.acq_rel.shared::cluster.cas.b64 %rd19, [%rd5], 1, 0;
+; CHECK-NEXT:    atom.acq_rel.shared::cluster.cas.b64 %rd20, [%rd5], 1, 0;
+; CHECK-NEXT:    fence.sc.sys;
+; CHECK-NEXT:    atom.acquire.shared::cluster.cas.b64 %rd21, [%rd5], 1, 0;
+; CHECK-NEXT:    fence.sc.sys;
+; CHECK-NEXT:    atom.acquire.shared::cluster.cas.b64 %rd22, [%rd5], 1, 0;
+; CHECK-NEXT:    fence.sc.sys;
+; CHECK-NEXT:    atom.acquire.shared::cluster.cas.b64 %rd23, [%rd5], 1, 0;
+; CHECK-NEXT:    and.b64 %rd1, %rd5, -4;
+; CHECK-NEXT:    cvt.u32.u64 %r46, %rd5;
+; CHECK-NEXT:    and.b32 %r47, %r46, 3;
+; CHECK-NEXT:    shl.b32 %r1, %r47, 3;
+; CHECK-NEXT:    mov.b32 %r48, 65535;
+; CHECK-NEXT:    shl.b32 %r49, %r48, %r1;
+; CHECK-NEXT:    not.b32 %r2, %r49;
+; CHECK-NEXT:    mov.b32 %r50, 1;
+; CHECK-NEXT:    shl.b32 %r3, %r50, %r1;
+; CHECK-NEXT:    ld.shared::cluster.u32 %r51, [%rd1];
+; CHECK-NEXT:    and.b32 %r64, %r51, %r2;
+; CHECK-NEXT:  $L__BB0_1: // %partword.cmpxchg.loop33
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or.b32 %r52, %r64, %r3;
+; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r6, [%rd1], %r52, %r64;
+; CHECK-NEXT:    setp.eq.s32 %p3, %r6, %r52;
+; CHECK-NEXT:    @%p3 bra $L__BB0_3;
+; CHECK-NEXT:  // %bb.2: // %partword.cmpxchg.failure32
+; CHECK-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    and.b32 %r7, %r6, %r2;
+; CHECK-NEXT:    setp.ne.s32 %p4, %r64, %r7;
+; CHECK-NEXT:    mov.b32 %r64, %r7;
+; CHECK-NEXT:    @%p4 bra $L__BB0_1;
+; CHECK-NEXT:  $L__BB0_3: // %partword.cmpxchg.end31
+; CHECK-NEXT:    ld.shared::cluster.u32 %r53, [%rd1];
+; CHECK-NEXT:    and.b32 %r65, %r53, %r2;
+; CHECK-NEXT:  $L__BB0_4: // %partword.cmpxchg.loop23
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or.b32 %r54, %r65, %r3;
+; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r10, [%rd1], %r54, %r65;
+; CHECK-NEXT:    setp.eq.s32 %p5, %r10, %r54;
+; CHECK-NEXT:    @%p5 bra $L__BB0_6;
+; CHECK-NEXT:  // %bb.5: // %partword.cmpxchg.failure22
+; CHECK-NEXT:    // in Loop: Header=BB0_4 Depth=1
+; CHECK-NEXT:    and.b32 %r11, %r10, %r2;
+; CHECK-NEXT:    setp.ne.s32 %p6, %r65, %r11;
+; CHECK-NEXT:    mov.b32 %r65, %r11;
+; CHECK-NEXT:    @%p6 bra $L__BB0_4;
+; CHECK-NEXT:  $L__BB0_6: // %partword.cmpxchg.end21
+; CHECK-NEXT:    fence.acq_rel.sys;
+; CHECK-NEXT:    fence.acq_rel.sys;
+; CHECK-NEXT:    ld.shared::cluster.u32 %r55, [%rd1];
+; CHECK-NEXT:    and.b32 %r66, %r55, %r2;
+; CHECK-NEXT:  $L__BB0_7: // %partword.cmpxchg.loop13
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or.b32 %r56, %r66, %r3;
+; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r14, [%rd1], %r56, %r66;
+; CHECK-NEXT:    setp.eq.s32 %p7, %r14, %r56;
+; CHECK-NEXT:    @%p7 bra $L__BB0_9;
+; CHECK-NEXT:  // %bb.8: // %partword.cmpxchg.failure12
+; CHECK-NEXT:    // in Loop: Header=BB0_7 Depth=1
+; CHECK-NEXT:    and.b32 %r15, %r14, %r2;
+; CHECK-NEXT:    setp.ne.s32 %p8, %r66, %r15;
+; CHECK-NEXT:    mov.b32 %r66, %r15;
+; CHECK-NEXT:    @%p8 bra $L__BB0_7;
+; CHECK-NEXT:  $L__BB0_9: // %partword.cmpxchg.end11
+; CHECK-NEXT:    fence.acq_rel.sys;
+; CHECK-NEXT:    ld.shared::cluster.u32 %r57, [%rd1];
+; CHECK-NEXT:    and.b32 %r67, %r57, %r2;
+; CHECK-NEXT:  $L__BB0_10: // %partword.cmpxchg.loop3
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or.b32 %r58, %r67, %r3;
+; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r18, [%rd1], %r58, %r67;
+; CHECK-NEXT:    setp.eq.s32 %p9, %r18, %r58;
+; CHECK-NEXT:    @%p9 bra $L__BB0_12;
+; CHECK-NEXT:  // %bb.11: // %partword.cmpxchg.failure2
+; CHECK-NEXT:    // in Loop: Header=BB0_10 Depth=1
+; CHECK-NEXT:    and.b32 %r19, %r18, %r2;
+; CHECK-NEXT:    setp.ne.s32 %p10, %r67, %r19;
+; CHECK-NEXT:    mov.b32 %r67, %r19;
+; CHECK-NEXT:    @%p10 bra $L__BB0_10;
+; CHECK-NEXT:  $L__BB0_12: // %partword.cmpxchg.end1
+; CHECK-NEXT:    fence.acq_rel.sys;
+; CHECK-NEXT:    fence.sc.sys;
+; CHECK-NEXT:    ld.shared::cluster.u32 %r59, [%rd1];
+; CHECK-NEXT:    and.b32 %r68, %r59, %r2;
+; CHECK-NEXT:  $L__BB0_13: // %partword.cmpxchg.loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    or.b32 %r60, %r68, %r3;
+; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r22, [%rd1], %r60, %r68;
+; CHECK-NEXT:    setp.eq.s32 %p11, %r22, %r60;
+; CHECK-NEXT:    @%p11 bra $L__BB0_15;
+; CHECK-NEXT:  // %bb.14: // %partword.cmpxchg.failure
+; CHECK-NEXT:    // in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    and.b32 %r23, %r22, %r2;
+; CHECK-NEXT:    setp.ne.s32 %p12, %r68, %r23;
+; CHECK-NEXT:    mov.b32 %r68, %r23;
+; CHECK-NEXT:    @%p12 bra $L__BB0_13;
+; CHECK-NEXT:  $L__BB0_15: // %partword.cmpxchg.end
+; CHECK-NEXT:    fence.acq_rel.sys;
+; CHECK-NEXT:    selp.b32 %r61, 1, 0, %p1;
+; CHECK-NEXT:    selp.b32 %r62, 1, 0, %p2;
+; CHECK-NEXT:    add.s32 %r63, %r61, %r62;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r63;
+; CHECK-NEXT:    ret;
+entry:
----------------
modiking wrote:
Good call, added

https://github.com/llvm/llvm-project/pull/135444
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to