simon_tatham created this revision. simon_tatham added reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard. Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls. Herald added projects: clang, LLVM.
This adds the unpredicated forms of six different MVE intrinsics which all round a vector of floating-point numbers to integer values, leaving them still in FP format, differing only in rounding mode and exception settings. Five of them map to existing target-independent intrinsics in LLVM IR, such as @llvm.trunc and @llvm.rint. The sixth, mapping to the `vrintn` instruction, is done by inventing a target-specific intrinsic. (`vrintn` behaves the same as `vrintx` in terms of the output value: the side effects on the FPSCR flags are the only difference between the two. But ACLE specifies separate user-callable intrinsics for the two, so the side effects matter enough to make sure we generate the right one of the two instructions in each case.) Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D74333 Files: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vrnd.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <8 x half> @test_vrndnq_f16(<8 x half> %a) { +; CHECK-LABEL: test_vrndnq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintn.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> %a) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndnq_f32(<4 x float> %a) { +; CHECK-LABEL: test_vrndnq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintn.f32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> %a) + ret <4 x float> %0 +} + +declare <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half>) +declare <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float>) Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3179,6 +3179,10 @@ (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>; def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))), (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>; + def : Pat<(v4f32 (int_arm_mve_vrintn (v4f32 MQPR:$val1))), + (v4f32 (MVE_VRINTf32N (v4f32 MQPR:$val1)))>; + def : Pat<(v8f16 (int_arm_mve_vrintn (v8f16 MQPR:$val1))), + (v8f16 (MVE_VRINTf16N (v8f16 MQPR:$val1)))>; } class MVEFloatArithNeon<string iname, string suffix, bit size, Index: llvm/include/llvm/IR/IntrinsicsARM.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsARM.td +++ llvm/include/llvm/IR/IntrinsicsARM.td @@ -1158,4 +1158,8 @@ [llvm_anyvector_ty /* output */], [llvm_i32_ty], [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */], LLVMMatchType<0>, llvm_anyvector_ty>; + +def int_arm_mve_vrintn: Intrinsic< + [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; + } // end TargetPrefix Index: clang/test/CodeGen/arm-mve-intrinsics/vrnd.c =================================================================== --- /dev/null +++ clang/test/CodeGen/arm-mve-intrinsics/vrnd.c @@ -0,0 +1,173 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s + +#include <arm_mve.h> + +// CHECK-LABEL: @test_vrndaq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.round.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndaq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndaq(a); +#else /* POLYMORPHIC */ + return vrndaq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndaq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndaq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndaq(a); +#else /* POLYMORPHIC */ + return vrndaq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndmq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndmq(a); +#else /* POLYMORPHIC */ + return vrndmq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndmq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndmq(a); +#else /* POLYMORPHIC */ + return vrndmq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.ceil.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndpq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndpq(a); +#else /* POLYMORPHIC */ + return vrndpq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndpq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndpq(a); +#else /* POLYMORPHIC */ + return vrndpq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.trunc.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndq(a); +#else /* POLYMORPHIC */ + return vrndq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndq(a); +#else /* POLYMORPHIC */ + return vrndq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.rint.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndxq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndxq(a); +#else /* POLYMORPHIC */ + return vrndxq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndxq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndxq(a); +#else /* POLYMORPHIC */ + return vrndxq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndnq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vrndnq(a); +#else /* POLYMORPHIC */ + return vrndnq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndnq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vrndnq(a); +#else /* POLYMORPHIC */ + return vrndnq_f32(a); +#endif /* POLYMORPHIC */ +} Index: clang/include/clang/Basic/arm_mve.td =================================================================== --- clang/include/clang/Basic/arm_mve.td +++ clang/include/clang/Basic/arm_mve.td @@ -349,6 +349,21 @@ defm : float_int_conversions<f32, s32, fptosi, sitofp>; defm : float_int_conversions<f16, s16, fptosi, sitofp>; +let params = T.Float in { + def vrndq: Intrinsic<Vector, (args Vector:$a), + (IRIntBase<"trunc", [Vector]> $a)>; + def vrndmq: Intrinsic<Vector, (args Vector:$a), + (IRIntBase<"floor", [Vector]> $a)>; + def vrndpq: Intrinsic<Vector, (args Vector:$a), + (IRIntBase<"ceil", [Vector]> $a)>; + def vrndaq: Intrinsic<Vector, (args Vector:$a), + (IRIntBase<"round", [Vector]> $a)>; + def vrndxq: Intrinsic<Vector, (args Vector:$a), + (IRIntBase<"rint", [Vector]> $a)>; + def vrndnq: Intrinsic<Vector, (args Vector:$a), + (IRInt<"vrintn", [Vector]> $a)>; +} + multiclass compare_with_pred<string condname, dag arguments, dag cmp, string suffix> { // Make the predicated and unpredicated versions of a single comparison.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits