This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG57aaab3b17f0: [NVPTX] Fix nvvm.match.sync*.i64 intrinsics return type (i64 -> i32) (authored by krisb).
Changed prior to commit: https://reviews.llvm.org/D120499?vs=411416&id=412018#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D120499/new/ https://reviews.llvm.org/D120499 Files: clang/include/clang/Basic/BuiltinsNVPTX.def clang/lib/Headers/__clang_cuda_intrinsics.h clang/test/CodeGen/builtins-nvptx-ptx60.cu llvm/include/llvm/IR/IntrinsicsNVVM.td llvm/lib/Target/NVPTX/NVPTXIntrinsics.td llvm/test/CodeGen/NVPTX/match.ll
Index: llvm/test/CodeGen/NVPTX/match.ll =================================================================== --- llvm/test/CodeGen/NVPTX/match.ll +++ llvm/test/CodeGen/NVPTX/match.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s declare i32 @llvm.nvvm.match.any.sync.i32(i32, i32) -declare i64 @llvm.nvvm.match.any.sync.i64(i32, i64) +declare i32 @llvm.nvvm.match.any.sync.i64(i32, i64) ; CHECK-LABEL: .func{{.*}}match.any.sync.i32 define i32 @match.any.sync.i32(i32 %mask, i32 %value) { @@ -23,26 +23,26 @@ } ; CHECK-LABEL: .func{{.*}}match.any.sync.i64 -define i64 @match.any.sync.i64(i32 %mask, i64 %value) { +define i32 @match.any.sync.i64(i32 %mask, i64 %value) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [match.any.sync.i64_param_0]; ; CHECK: ld.param.u64 [[VALUE:%rd[0-9]+]], [match.any.sync.i64_param_1]; - ; CHECK: match.any.sync.b64 [[V0:%rd[0-9]+]], [[VALUE]], [[MASK]]; - %v0 = call i64 @llvm.nvvm.match.any.sync.i64(i32 %mask, i64 %value) - ; CHECK: match.any.sync.b64 [[V1:%rd[0-9]+]], [[VALUE]], 1; - %v1 = call i64 @llvm.nvvm.match.any.sync.i64(i32 1, i64 %value) - ; CHECK: match.any.sync.b64 [[V2:%rd[0-9]+]], 2, [[MASK]]; - %v2 = call i64 @llvm.nvvm.match.any.sync.i64(i32 %mask, i64 2) - ; CHECK: match.any.sync.b64 [[V3:%rd[0-9]+]], 4, 3; - %v3 = call i64 @llvm.nvvm.match.any.sync.i64(i32 3, i64 4) - %sum1 = add i64 %v0, %v1 - %sum2 = add i64 %v2, %v3 - %sum3 = add i64 %sum1, %sum2 - ret i64 %sum3; + ; CHECK: match.any.sync.b64 [[V0:%r[0-9]+]], [[VALUE]], [[MASK]]; + %v0 = call i32 @llvm.nvvm.match.any.sync.i64(i32 %mask, i64 %value) + ; CHECK: match.any.sync.b64 [[V1:%r[0-9]+]], [[VALUE]], 1; + %v1 = call i32 @llvm.nvvm.match.any.sync.i64(i32 1, i64 %value) + ; CHECK: match.any.sync.b64 [[V2:%r[0-9]+]], 2, [[MASK]]; + %v2 = call i32 @llvm.nvvm.match.any.sync.i64(i32 %mask, i64 2) + ; CHECK: match.any.sync.b64 [[V3:%r[0-9]+]], 4, 3; + %v3 = call i32 @llvm.nvvm.match.any.sync.i64(i32 3, i64 4) + %sum1 = add i32 %v0, %v1 + %sum2 = add i32 %v2, %v3 + %sum3 = add i32 %sum1, %sum2 + ret i32 %sum3; } declare {i32, i1} @llvm.nvvm.match.all.sync.i32p(i32, i32) -declare {i64, i1} @llvm.nvvm.match.all.sync.i64p(i32, i64) +declare {i32, i1} @llvm.nvvm.match.all.sync.i64p(i32, i64) ; CHECK-LABEL: .func{{.*}}match.all.sync.i32p( define {i32,i1} @match.all.sync.i32p(i32 %mask, i32 %value) { @@ -81,37 +81,37 @@ } ; CHECK-LABEL: .func{{.*}}match.all.sync.i64p( -define {i64,i1} @match.all.sync.i64p(i32 %mask, i64 %value) { +define {i32,i1} @match.all.sync.i64p(i32 %mask, i64 %value) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [match.all.sync.i64p_param_0]; ; CHECK: ld.param.u64 [[VALUE:%rd[0-9]+]], [match.all.sync.i64p_param_1]; - ; CHECK: match.all.sync.b64 {{%rd[0-9]+\|%p[0-9]+}}, [[VALUE]], [[MASK]]; - %r1 = call {i64, i1} @llvm.nvvm.match.all.sync.i64p(i32 %mask, i64 %value) - %v1 = extractvalue {i64, i1} %r1, 0 - %p1 = extractvalue {i64, i1} %r1, 1 - - ; CHECK: match.all.sync.b64 {{%rd[0-9]+\|%p[0-9]+}}, 1, [[MASK]]; - %r2 = call {i64, i1} @llvm.nvvm.match.all.sync.i64p(i32 %mask, i64 1) - %v2 = extractvalue {i64, i1} %r2, 0 - %p2 = extractvalue {i64, i1} %r2, 1 - - ; CHECK: match.all.sync.b64 {{%rd[0-9]+\|%p[0-9]+}}, [[VALUE]], 2; - %r3 = call {i64, i1} @llvm.nvvm.match.all.sync.i64p(i32 2, i64 %value) - %v3 = extractvalue {i64, i1} %r3, 0 - %p3 = extractvalue {i64, i1} %r3, 1 - - ; CHECK: match.all.sync.b64 {{%rd[0-9]+\|%p[0-9]+}}, 4, 3; - %r4 = call {i64, i1} @llvm.nvvm.match.all.sync.i64p(i32 3, i64 4) - %v4 = extractvalue {i64, i1} %r4, 0 - %p4 = extractvalue {i64, i1} %r4, 1 - - %vsum1 = add i64 %v1, %v2 - %vsum2 = add i64 %v3, %v4 - %vsum3 = add i64 %vsum1, %vsum2 + ; CHECK: match.all.sync.b64 {{%r[0-9]+\|%p[0-9]+}}, [[VALUE]], [[MASK]]; + %r1 = call {i32, i1} @llvm.nvvm.match.all.sync.i64p(i32 %mask, i64 %value) + %v1 = extractvalue {i32, i1} %r1, 0 + %p1 = extractvalue {i32, i1} %r1, 1 + + ; CHECK: match.all.sync.b64 {{%r[0-9]+\|%p[0-9]+}}, 1, [[MASK]]; + %r2 = call {i32, i1} @llvm.nvvm.match.all.sync.i64p(i32 %mask, i64 1) + %v2 = extractvalue {i32, i1} %r2, 0 + %p2 = extractvalue {i32, i1} %r2, 1 + + ; CHECK: match.all.sync.b64 {{%r[0-9]+\|%p[0-9]+}}, [[VALUE]], 2; + %r3 = call {i32, i1} @llvm.nvvm.match.all.sync.i64p(i32 2, i64 %value) + %v3 = extractvalue {i32, i1} %r3, 0 + %p3 = extractvalue {i32, i1} %r3, 1 + + ; CHECK: match.all.sync.b64 {{%r[0-9]+\|%p[0-9]+}}, 4, 3; + %r4 = call {i32, i1} @llvm.nvvm.match.all.sync.i64p(i32 3, i64 4) + %v4 = extractvalue {i32, i1} %r4, 0 + %p4 = extractvalue {i32, i1} %r4, 1 + + %vsum1 = add i32 %v1, %v2 + %vsum2 = add i32 %v3, %v4 + %vsum3 = add i32 %vsum1, %vsum2 %psum1 = add i1 %p1, %p2 %psum2 = add i1 %p3, %p4 %psum3 = add i1 %psum1, %psum2 - %ret0 = insertvalue {i64, i1} undef, i64 %vsum3, 0 - %ret1 = insertvalue {i64, i1} %ret0, i1 %psum3, 1 - ret {i64, i1} %ret1; + %ret0 = insertvalue {i32, i1} undef, i32 %vsum3, 0 + %ret1 = insertvalue {i32, i1} %ret0, i1 %psum3, 1 + ret {i32, i1} %ret1; } Index: llvm/lib/Target/NVPTX/NVPTXIntrinsics.td =================================================================== --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -223,21 +223,21 @@ multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, Operand ImmOp> { - def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value), + def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>, + [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value), + def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, + [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value), + def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; - def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value), + def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; } @@ -248,25 +248,25 @@ multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, Operand ImmOp> { - def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask, ImmOp:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask, ImmOp:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask, regclass:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; - def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask, regclass:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; } defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, Index: llvm/include/llvm/IR/IntrinsicsNVVM.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsNVVM.td +++ llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -4499,7 +4499,7 @@ GCCBuiltin<"__nvvm_match_any_sync_i32">; // match.any.sync.b64 mask, value def int_nvvm_match_any_sync_i64 : - Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i64">, GCCBuiltin<"__nvvm_match_any_sync_i64">; @@ -4513,7 +4513,7 @@ [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i32p">; // match.all.sync.b64p mask, value def int_nvvm_match_all_sync_i64p : - Intrinsic<[llvm_i64_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], + Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i64p">; // Index: clang/test/CodeGen/builtins-nvptx-ptx60.cu =================================================================== --- clang/test/CodeGen/builtins-nvptx-ptx60.cu +++ clang/test/CodeGen/builtins-nvptx-ptx60.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \ +// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_70 \ // RUN: -fcuda-is-device -target-feature +ptx60 \ // RUN: -S -emit-llvm -o - -x cuda %s \ // RUN: | FileCheck -check-prefix=CHECK %s @@ -10,7 +10,7 @@ // RUN: -fcuda-is-device -target-feature +ptx70 \ // RUN: -S -emit-llvm -o - -x cuda %s \ // RUN: | FileCheck -check-prefix=CHECK %s -// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \ +// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_70 \ // RUN: -fcuda-is-device -S -o /dev/null -x cuda -verify %s #define __device__ __attribute__((device)) @@ -89,16 +89,16 @@ // // CHECK: call i32 @llvm.nvvm.match.any.sync.i32(i32 - // expected-error@+1 {{'__nvvm_match_any_sync_i32' needs target feature ptx60}} + // expected-error-re@+1 {{'__nvvm_match_any_sync_i32' needs target feature (sm_70{{.*}}),(ptx60{{.*}})}} __nvvm_match_any_sync_i32(mask, i); - // CHECK: call i64 @llvm.nvvm.match.any.sync.i64(i32 - // expected-error@+1 {{'__nvvm_match_any_sync_i64' needs target feature ptx60}} + // CHECK: call i32 @llvm.nvvm.match.any.sync.i64(i32 + // expected-error-re@+1 {{'__nvvm_match_any_sync_i64' needs target feature (sm_70{{.*}}),(ptx60{{.*}})}} __nvvm_match_any_sync_i64(mask, i64); // CHECK: call { i32, i1 } @llvm.nvvm.match.all.sync.i32p(i32 - // expected-error@+1 {{'__nvvm_match_all_sync_i32p' needs target feature ptx60}} + // expected-error-re@+1 {{'__nvvm_match_all_sync_i32p' needs target feature (sm_70{{.*}}),(ptx60{{.*}})}} __nvvm_match_all_sync_i32p(mask, i, &i); - // CHECK: call { i64, i1 } @llvm.nvvm.match.all.sync.i64p(i32 - // expected-error@+1 {{'__nvvm_match_all_sync_i64p' needs target feature ptx60}} + // CHECK: call { i32, i1 } @llvm.nvvm.match.all.sync.i64p(i32 + // expected-error-re@+1 {{'__nvvm_match_all_sync_i64p' needs target feature (sm_70{{.*}}),(ptx60{{.*}})}} __nvvm_match_all_sync_i64p(mask, i64, &i); // CHECK: ret void Index: clang/lib/Headers/__clang_cuda_intrinsics.h =================================================================== --- clang/lib/Headers/__clang_cuda_intrinsics.h +++ clang/lib/Headers/__clang_cuda_intrinsics.h @@ -234,7 +234,7 @@ return __nvvm_match_any_sync_i32(mask, value); } -inline __device__ unsigned long long +inline __device__ unsigned int __match64_any_sync(unsigned int mask, unsigned long long value) { return __nvvm_match_any_sync_i64(mask, value); } @@ -244,7 +244,7 @@ return __nvvm_match_all_sync_i32p(mask, value, pred); } -inline __device__ unsigned long long +inline __device__ unsigned int __match64_all_sync(unsigned int mask, unsigned long long value, int *pred) { return __nvvm_match_all_sync_i64p(mask, value, pred); } Index: clang/include/clang/Basic/BuiltinsNVPTX.def =================================================================== --- clang/include/clang/Basic/BuiltinsNVPTX.def +++ clang/include/clang/Basic/BuiltinsNVPTX.def @@ -473,11 +473,11 @@ TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60) // Match -TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", PTX60) -TARGET_BUILTIN(__nvvm_match_any_sync_i64, "WiUiWi", "", PTX60) +TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60)) +TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60)) // These return a pair {value, predicate}, which requires custom lowering. -TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", PTX60) -TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "WiUiWii*", "", PTX60) +TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60)) +TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60)) // Redux TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70))
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits