Author: David Green Date: 2025-07-04T15:27:33+01:00 New Revision: 9fcea2e4656acbf6025e73da641e619ff12ee3d0
URL: https://github.com/llvm/llvm-project/commit/9fcea2e4656acbf6025e73da641e619ff12ee3d0 DIFF: https://github.com/llvm/llvm-project/commit/9fcea2e4656acbf6025e73da641e619ff12ee3d0.diff LOG: [ARM] Add neon vector support for roundeven As per #142559, this marks froundeven as legal for Neon and upgrades the existing arm.neon.vrintn intrinsics. Added: Modified: clang/lib/CodeGen/TargetBuiltins/ARM.cpp clang/test/CodeGen/arm-neon-directed-rounding.c clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/IR/AutoUpgrade.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMInstrNEON.td llvm/lib/Target/ARM/ARMInstrVFP.td llvm/test/CodeGen/ARM/vrint.ll llvm/test/CodeGen/ARM/vrintn.ll Removed: ################################################################################ diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index fcfb92d65958e..7e6a47fd7c103 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -845,8 +845,8 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP0(vrndiq_v), NEONMAP1(vrndm_v, floor, Add1ArgType), NEONMAP1(vrndmq_v, floor, Add1ArgType), - NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), - NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), + NEONMAP1(vrndn_v, roundeven, Add1ArgType), + NEONMAP1(vrndnq_v, roundeven, Add1ArgType), NEONMAP1(vrndp_v, ceil, Add1ArgType), NEONMAP1(vrndpq_v, ceil, Add1ArgType), NEONMAP1(vrndq_v, trunc, Add1ArgType), @@ -3132,7 +3132,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vrndns_f32: { Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Tys[] = {Arg->getType()}; - Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys); + Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys); return Builder.CreateCall(F, {Arg}, "vrndn"); } case NEON::BI__builtin_neon_vset_lane_i8: diff --git a/clang/test/CodeGen/arm-neon-directed-rounding.c b/clang/test/CodeGen/arm-neon-directed-rounding.c index 47fa6ade44830..08667314e37ce 100644 --- a/clang/test/CodeGen/arm-neon-directed-rounding.c +++ b/clang/test/CodeGen/arm-neon-directed-rounding.c @@ -116,7 +116,7 @@ float32x4_t test_vrndmq_f32(float32x4_t a) { // CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> // CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> // CHECK-A32-NEXT: [[VRNDN_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK-A32-NEXT: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> [[VRNDN_V_I]]) +// CHECK-A32-NEXT: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.roundeven.v2f32(<2 x float> [[VRNDN_V_I]]) // CHECK-A32-NEXT: [[VRNDN_V2_I:%.*]] = bitcast <2 x float> [[VRNDN_V1_I]] to <8 x i8> // CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDN_V2_I]] to <2 x i32> // CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float> @@ -141,7 +141,7 @@ float32x2_t test_vrndn_f32(float32x2_t a) { // CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32> // CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> // CHECK-A32-NEXT: [[VRNDNQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK-A32-NEXT: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> [[VRNDNQ_V_I]]) +// CHECK-A32-NEXT: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[VRNDNQ_V_I]]) // CHECK-A32-NEXT: [[VRNDNQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDNQ_V1_I]] to <16 x i8> // CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDNQ_V2_I]] to <4 x i32> // CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> @@ -310,24 +310,18 @@ float32x4_t test_vrndq_f32(float32x4_t a) { return vrndq_f32(a); } -// CHECK-A32-LABEL: define dso_local float @test_vrndns_f32( -// CHECK-A32-SAME: float noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-A32-NEXT: [[ENTRY:.*:]] -// CHECK-A32-NEXT: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float [[A]]) -// CHECK-A32-NEXT: ret float [[VRNDN_I]] -// -// CHECK-A64-LABEL: define dso_local float @test_vrndns_f32( -// CHECK-A64-SAME: float noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float [[A]]) -// CHECK-A64-NEXT: ret float [[VRNDN_I]] +// CHECK-LABEL: define dso_local float @test_vrndns_f32( +// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float [[A]]) +// CHECK-NEXT: ret float [[VRNDN_I]] // float32_t test_vrndns_f32(float32_t a) { return vrndns_f32(a); } // CHECK-LABEL: define dso_local <2 x float> @test_vrndi_f32( -// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> diff --git a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c index c55bb9ca0d78c..17d4eef1f7631 100644 --- a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c @@ -618,7 +618,7 @@ float16x8_t test_vrndmq_f16(float16x8_t a) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-NEXT: [[VRNDN_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK-NEXT: [[VRNDN_V1_I:%.*]] = call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> [[VRNDN_V_I]]) +// CHECK-NEXT: [[VRNDN_V1_I:%.*]] = call <4 x half> @llvm.roundeven.v4f16(<4 x half> [[VRNDN_V_I]]) // CHECK-NEXT: [[VRNDN_V2_I:%.*]] = bitcast <4 x half> [[VRNDN_V1_I]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDN_V2_I]] to <4 x i16> // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half> @@ -634,7 +634,7 @@ float16x4_t test_vrndn_f16(float16x4_t a) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> // CHECK-NEXT: [[VRNDNQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK-NEXT: [[VRNDNQ_V1_I:%.*]] = call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> [[VRNDNQ_V_I]]) +// CHECK-NEXT: [[VRNDNQ_V1_I:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[VRNDNQ_V_I]]) // CHECK-NEXT: [[VRNDNQ_V2_I:%.*]] = bitcast <8 x half> [[VRNDNQ_V1_I]] to <16 x i8> // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDNQ_V2_I]] to <8 x i16> // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half> diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index c7929e78a5fda..1219ce2f86da8 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -451,9 +451,6 @@ class Neon_3Arg_Long_Intrinsic LLVMTruncatedType<0>], [IntrNoMem]>; -class Neon_1FloatArg_Intrinsic - : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - class Neon_CvtFxToFP_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; @@ -677,9 +674,6 @@ def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic; def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic; def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic; -// Vector and Scalar Rounding. -def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic; - // De-interleaving vector loads from N-element structures. // Source operands are the address and alignment. def int_arm_neon_vld1 : DefaultAttrsIntrinsic< diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 1758b63a76dcd..86285a03c66bb 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -720,6 +720,7 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, .StartsWith("vqsubs.", Intrinsic::ssub_sat) .StartsWith("vqsubu.", Intrinsic::usub_sat) .StartsWith("vrinta.", Intrinsic::round) + .StartsWith("vrintn.", Intrinsic::roundeven) .StartsWith("vrintm.", Intrinsic::floor) .StartsWith("vrintp.", Intrinsic::ceil) .StartsWith("vrintx.", Intrinsic::rint) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index d107fb3884079..b073c8651dcdb 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1548,6 +1548,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v2f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::FROUNDEVEN, MVT::v2f32, Legal); + setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal); setOperationAction(ISD::FCEIL, MVT::v2f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal); @@ -1571,6 +1573,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FFLOOR, MVT::v8f16, Legal); setOperationAction(ISD::FROUND, MVT::v4f16, Legal); setOperationAction(ISD::FROUND, MVT::v8f16, Legal); + setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Legal); + setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Legal); setOperationAction(ISD::FCEIL, MVT::v4f16, Legal); setOperationAction(ISD::FCEIL, MVT::v8f16, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f16, Legal); diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index d7324014ab4ba..7485ef569445a 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -7312,7 +7312,7 @@ multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { } } -defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; +defm VRINTNN : VRINT_FPI<"n", 0b000, froundeven>; defm VRINTXN : VRINT_FPI<"x", 0b001, frint>; defm VRINTAN : VRINT_FPI<"a", 0b010, fround>; defm VRINTZN : VRINT_FPI<"z", 0b011, ftrunc>; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index 1d5c12fabf093..31650e0137beb 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -1135,13 +1135,8 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm, Requires<[HasFPARMv8,HasDPVFP]>; } -// Match either froundeven or int_arm_neon_vrintn -def vrintn_or_froundeven : PatFrags<(ops node:$src), - [(int_arm_neon_vrintn node:$src), - (froundeven node:$src)]>; - defm VRINTA : vrint_inst_anpm<"a", 0b00, fround>; -defm VRINTN : vrint_inst_anpm<"n", 0b01, vrintn_or_froundeven>; +defm VRINTN : vrint_inst_anpm<"n", 0b01, froundeven>; defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>; defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; diff --git a/llvm/test/CodeGen/ARM/vrint.ll b/llvm/test/CodeGen/ARM/vrint.ll index 1766af1486b9c..52107aac65187 100644 --- a/llvm/test/CodeGen/ARM/vrint.ll +++ b/llvm/test/CodeGen/ARM/vrint.ll @@ -1084,21 +1084,7 @@ define <4 x half> @frintn_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: frintn_4h: ; CHECK-FP16: @ %bb.0: -; CHECK-FP16-NEXT: vmovx.f16 s2, s0 -; CHECK-FP16-NEXT: vrintn.f16 s2, s2 -; CHECK-FP16-NEXT: vmov r0, s2 -; CHECK-FP16-NEXT: vrintn.f16 s2, s0 -; CHECK-FP16-NEXT: vmov r1, s2 -; CHECK-FP16-NEXT: vrintn.f16 s2, s1 -; CHECK-FP16-NEXT: vmovx.f16 s0, s1 -; CHECK-FP16-NEXT: vrintn.f16 s0, s0 -; CHECK-FP16-NEXT: vmov.16 d16[0], r1 -; CHECK-FP16-NEXT: vmov.16 d16[1], r0 -; CHECK-FP16-NEXT: vmov r0, s2 -; CHECK-FP16-NEXT: vmov.16 d16[2], r0 -; CHECK-FP16-NEXT: vmov r0, s0 -; CHECK-FP16-NEXT: vmov.16 d16[3], r0 -; CHECK-FP16-NEXT: vorr d0, d16, d16 +; CHECK-FP16-NEXT: vrintn.f16 d0, d0 ; CHECK-FP16-NEXT: bx lr %tmp3 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) ret <4 x half> %tmp3 @@ -1248,35 +1234,7 @@ define <8 x half> @frintn_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: frintn_8h: ; CHECK-FP16: @ %bb.0: -; CHECK-FP16-NEXT: vmovx.f16 s4, s2 -; CHECK-FP16-NEXT: vrintn.f16 s4, s4 -; CHECK-FP16-NEXT: vmov r0, s4 -; CHECK-FP16-NEXT: vrintn.f16 s4, s2 -; CHECK-FP16-NEXT: vmov r1, s4 -; CHECK-FP16-NEXT: vrintn.f16 s4, s3 -; CHECK-FP16-NEXT: vmov.16 d17[0], r1 -; CHECK-FP16-NEXT: vmov.16 d17[1], r0 -; CHECK-FP16-NEXT: vmov r0, s4 -; CHECK-FP16-NEXT: vmovx.f16 s4, s3 -; CHECK-FP16-NEXT: vrintn.f16 s4, s4 -; CHECK-FP16-NEXT: vmov.16 d17[2], r0 -; CHECK-FP16-NEXT: vmov r0, s4 -; CHECK-FP16-NEXT: vmovx.f16 s4, s0 -; CHECK-FP16-NEXT: vrintn.f16 s4, s4 -; CHECK-FP16-NEXT: vmov.16 d17[3], r0 -; CHECK-FP16-NEXT: vmov r0, s4 -; CHECK-FP16-NEXT: vrintn.f16 s4, s0 -; CHECK-FP16-NEXT: vmovx.f16 s0, s1 -; CHECK-FP16-NEXT: vmov r1, s4 -; CHECK-FP16-NEXT: vrintn.f16 s4, s1 -; CHECK-FP16-NEXT: vrintn.f16 s0, s0 -; CHECK-FP16-NEXT: vmov.16 d16[0], r1 -; CHECK-FP16-NEXT: vmov.16 d16[1], r0 -; CHECK-FP16-NEXT: vmov r0, s4 -; CHECK-FP16-NEXT: vmov.16 d16[2], r0 -; CHECK-FP16-NEXT: vmov r0, s0 -; CHECK-FP16-NEXT: vmov.16 d16[3], r0 -; CHECK-FP16-NEXT: vorr q0, q8, q8 +; CHECK-FP16-NEXT: vrintn.f16 q0, q0 ; CHECK-FP16-NEXT: bx lr %tmp3 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) ret <8 x half> %tmp3 @@ -1302,9 +1260,7 @@ define <2 x float> @frintn_2s(<2 x float> %A) nounwind { ; ; CHECK-LABEL: frintn_2s: ; CHECK: @ %bb.0: -; CHECK-NEXT: vrintn.f32 s3, s1 -; CHECK-NEXT: vrintn.f32 s2, s0 -; CHECK-NEXT: vmov.f64 d0, d1 +; CHECK-NEXT: vrintn.f32 d0, d0 ; CHECK-NEXT: bx lr %tmp3 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) ret <2 x float> %tmp3 @@ -1336,11 +1292,7 @@ define <4 x float> @frintn_4s(<4 x float> %A) nounwind { ; ; CHECK-LABEL: frintn_4s: ; CHECK: @ %bb.0: -; CHECK-NEXT: vrintn.f32 s7, s3 -; CHECK-NEXT: vrintn.f32 s6, s2 -; CHECK-NEXT: vrintn.f32 s5, s1 -; CHECK-NEXT: vrintn.f32 s4, s0 -; CHECK-NEXT: vorr q0, q1, q1 +; CHECK-NEXT: vrintn.f32 q0, q0 ; CHECK-NEXT: bx lr %tmp3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) ret <4 x float> %tmp3 diff --git a/llvm/test/CodeGen/ARM/vrintn.ll b/llvm/test/CodeGen/ARM/vrintn.ll index 40f806ba55d36..077007b22fc4b 100644 --- a/llvm/test/CodeGen/ARM/vrintn.ll +++ b/llvm/test/CodeGen/ARM/vrintn.ll @@ -1,12 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=armv8 -mattr=+neon %s -o - | FileCheck %s declare float @llvm.arm.neon.vrintn.f32(float) nounwind readnone declare <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float>) nounwind readnone declare <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float>) nounwind readnone -; CHECK-LABEL: vrintn_f32: -; CHECK: vrintn.f32 define float @vrintn_f32(ptr %A) nounwind { +; CHECK-LABEL: vrintn_f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vldr s0, [r0] +; CHECK-NEXT: vrintn.f32 s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr %tmp1 = load float, ptr %A %tmp2 = call float @llvm.arm.neon.vrintn.f32(float %tmp1) ret float %tmp2 @@ -74,10 +79,9 @@ define <4 x half> @roundeven_4h(<4 x half> %A) nounwind { define <2 x float> @roundeven_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: roundeven_2s: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov d0, r0, r1 -; CHECK-NEXT: vrintn.f32 s3, s1 -; CHECK-NEXT: vrintn.f32 s2, s0 -; CHECK-NEXT: vmov r0, r1, d1 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vrintn.f32 d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: bx lr %tmp3 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) ret <2 x float> %tmp3 @@ -86,14 +90,11 @@ define <2 x float> @roundeven_2s(<2 x float> %A) nounwind { define <4 x float> @roundeven_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: roundeven_4s: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov d1, r2, r3 -; CHECK-NEXT: vmov d0, r0, r1 -; CHECK-NEXT: vrintn.f32 s7, s3 -; CHECK-NEXT: vrintn.f32 s6, s2 -; CHECK-NEXT: vrintn.f32 s5, s1 -; CHECK-NEXT: vrintn.f32 s4, s0 -; CHECK-NEXT: vmov r2, r3, d3 -; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vrintn.f32 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr %tmp3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) ret <4 x float> %tmp3 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits