https://github.com/Wolfram70 updated https://github.com/llvm/llvm-project/pull/167836
>From 825816b9270bb9d05aac05a56ad84c86c58b830f Mon Sep 17 00:00:00 2001 From: Srinivasa Ravi <[email protected]> Date: Wed, 12 Nov 2025 06:05:55 +0000 Subject: [PATCH 1/2] [clang][NVPTX] Fix SM requirement of f32-tf32 rna satfinite conversion This change fixes the SM requirement of the f32 to tf32 conversion with `rna` rounding mode and `.satfinite` modifier. The current requirement specified is `sm_89` but this conversion is supported from `sm_80` onwards after it was added in PTX 8.1. --- clang/include/clang/Basic/BuiltinsNVPTX.td | 2 +- clang/test/CodeGen/builtins-nvptx.c | 10 +++++++--- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 2 +- llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll | 18 ++++++++++++++++++ llvm/test/CodeGen/NVPTX/convert-sm89.ll | 7 ------- 5 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td index d923d2a90e908..ad448766e665f 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.td +++ b/clang/include/clang/Basic/BuiltinsNVPTX.td @@ -615,7 +615,7 @@ def __nvvm_f2bf16_rz : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>; def __nvvm_f2bf16_rz_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>; def __nvvm_f2tf32_rna : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_80, PTX70>; -def __nvvm_f2tf32_rna_satfinite : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_89, PTX81>; +def __nvvm_f2tf32_rna_satfinite : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_80, PTX81>; def __nvvm_f2tf32_rn : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_90, PTX78>; def __nvvm_f2tf32_rn_relu : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_90, PTX78>; def __nvvm_f2tf32_rn_satfinite : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_100, PTX86>; diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index e3be262622844..ca3cf06966b1d 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -28,6 +28,9 @@ // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_89 -target-feature +ptx81 -DPTX=81\ // RUN: -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \ // RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX81_SM89 %s +// RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_80 -target-feature +ptx81 -DPTX=81 \ +// RUN: -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX81_SM80 %s // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_90 -target-feature +ptx78 -DPTX=78 \ // RUN: -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \ // RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX78_SM90 %s @@ -1025,6 +1028,10 @@ __device__ void nvvm_cvt_sm80() { // CHECK_PTX70_SM80: call i32 @llvm.nvvm.f2tf32.rna(float 1.000000e+00) __nvvm_f2tf32_rna(1); + #if PTX >= 81 + // CHECK_PTX81_SM80: call i32 @llvm.nvvm.f2tf32.rna.satfinite(float 1.000000e+00) + __nvvm_f2tf32_rna_satfinite(1.0f); + #endif #endif // CHECK: ret void } @@ -1058,9 +1065,6 @@ __device__ void nvvm_cvt_sm89() { __nvvm_e5m2x2_to_f16x2_rn(0x4c4c); // CHECK_PTX81_SM89: call <2 x half> @llvm.nvvm.e5m2x2.to.f16x2.rn.relu(i16 19532) __nvvm_e5m2x2_to_f16x2_rn_relu(0x4c4c); - - // CHECK_PTX81_SM89: call i32 @llvm.nvvm.f2tf32.rna.satfinite(float 1.000000e+00) - __nvvm_f2tf32_rna_satfinite(1.0f); #endif // CHECK: ret void } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index dfde0cca0f00c..f49a98e2b4c84 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -684,7 +684,7 @@ let hasSideEffects = false in { defm CVT_to_tf32_rn_relu : CVT_TO_TF32<"rn.relu">; defm CVT_to_tf32_rz_relu : CVT_TO_TF32<"rz.relu">; defm CVT_to_tf32_rna : CVT_TO_TF32<"rna", [hasPTX<70>, hasSM<80>]>; - defm CVT_to_tf32_rna_satf : CVT_TO_TF32<"rna.satfinite", [hasPTX<81>, hasSM<89>]>; + defm CVT_to_tf32_rna_satf : CVT_TO_TF32<"rna.satfinite", [hasPTX<81>, hasSM<80>]>; defm CVT_to_tf32_rn_satf : CVT_TO_TF32<"rn.satfinite", [hasPTX<86>, hasSM<100>]>; defm CVT_to_tf32_rz_satf : CVT_TO_TF32<"rz.satfinite", [hasPTX<86>, hasSM<100>]>; diff --git a/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll b/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll new file mode 100644 index 0000000000000..f47c2f2a85156 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx81 | FileCheck %s +; RUN: %if ptxas-sm_80 && ptxas-isa-8.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx81 | %ptxas-verify -arch=sm_80 %} + +; CHECK-LABEL: cvt_rna_satfinite_tf32_f32 +define i32 @cvt_rna_satfinite_tf32_f32(float %f1) { +; CHECK-LABEL: cvt_rna_satfinite_tf32_f32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [cvt_rna_satfinite_tf32_f32_param_0]; +; CHECK-NEXT: cvt.rna.satfinite.tf32.f32 %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %val = call i32 @llvm.nvvm.f2tf32.rna.satfinite(float %f1) + ret i32 %val +} diff --git a/llvm/test/CodeGen/NVPTX/convert-sm89.ll b/llvm/test/CodeGen/NVPTX/convert-sm89.ll index 616dcfa330e81..170c120162cc3 100644 --- a/llvm/test/CodeGen/NVPTX/convert-sm89.ll +++ b/llvm/test/CodeGen/NVPTX/convert-sm89.ll @@ -84,10 +84,3 @@ define <2 x half> @cvt_rn_relu_f16x2_e5m2x2(i16 %in) { %val = call <2 x half> @llvm.nvvm.e5m2x2.to.f16x2.rn.relu(i16 %in); ret <2 x half> %val } - -; CHECK-LABEL: cvt_rna_satfinite_tf32_f32 -define i32 @cvt_rna_satfinite_tf32_f32(float %f1) { -; CHECK: cvt.rna.satfinite.tf32.f32 - %val = call i32 @llvm.nvvm.f2tf32.rna.satfinite(float %f1) - ret i32 %val -} >From 1c55c11edcd1dc1d56010489be9aff95638a14e8 Mon Sep 17 00:00:00 2001 From: Srinivasa Ravi <[email protected]> Date: Mon, 17 Nov 2025 06:54:53 +0000 Subject: [PATCH 2/2] fix alignment --- clang/test/CodeGen/builtins-nvptx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index ca3cf06966b1d..c0ed799970122 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -1031,7 +1031,7 @@ __device__ void nvvm_cvt_sm80() { #if PTX >= 81 // CHECK_PTX81_SM80: call i32 @llvm.nvvm.f2tf32.rna.satfinite(float 1.000000e+00) __nvvm_f2tf32_rna_satfinite(1.0f); - #endif + #endif #endif // CHECK: ret void } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
