Author: Ulrich Weigand Date: 2025-02-14T00:10:37+01:00 New Revision: adacbf68ebeaeaf9d07a5beaff163da9ca0534a9
URL: https://github.com/llvm/llvm-project/commit/adacbf68ebeaeaf9d07a5beaff163da9ca0534a9 DIFF: https://github.com/llvm/llvm-project/commit/adacbf68ebeaeaf9d07a5beaff163da9ca0534a9.diff LOG: [SystemZ] Add codegen support for llvm.roundeven This is straightforward as we already had all the necessary instructions, they simply were not wired up. Also allows implementing the vec_round intrinsic via the standard llvm.roundeven IR instead of a platform intrinsic now. Added: Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/SystemZ/builtins-systemz-vector-constrained.c clang/test/CodeGen/SystemZ/builtins-systemz-vector.c clang/test/CodeGen/SystemZ/builtins-systemz-vector2-constrained.c clang/test/CodeGen/SystemZ/builtins-systemz-vector2.c clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c llvm/lib/Target/SystemZ/SystemZISelLowering.cpp llvm/lib/Target/SystemZ/SystemZInstrFP.td llvm/lib/Target/SystemZ/SystemZInstrVector.td llvm/test/CodeGen/SystemZ/fp-round-01.ll llvm/test/CodeGen/SystemZ/fp-round-02.ll llvm/test/CodeGen/SystemZ/fp-round-03.ll llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll llvm/test/CodeGen/SystemZ/fp-strict-round-02.ll llvm/test/CodeGen/SystemZ/fp-strict-round-03.ll llvm/test/CodeGen/SystemZ/vec-round-01.ll llvm/test/CodeGen/SystemZ/vec-round-02.ll llvm/test/CodeGen/SystemZ/vec-strict-round-01.ll llvm/test/CodeGen/SystemZ/vec-strict-round-02.ll llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 361e4c4bf2e2e..1f1983620fdee 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -21011,6 +21011,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, CI = Intrinsic::experimental_constrained_nearbyint; break; case 1: ID = Intrinsic::round; CI = Intrinsic::experimental_constrained_round; break; + case 4: ID = Intrinsic::roundeven; + CI = Intrinsic::experimental_constrained_roundeven; break; case 5: ID = Intrinsic::trunc; CI = Intrinsic::experimental_constrained_trunc; break; case 6: ID = Intrinsic::ceil; diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector-constrained.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector-constrained.c index 6d2845504a39f..ff24ef9a091b7 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector-constrained.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector-constrained.c @@ -44,12 +44,14 @@ void test_float(void) { // CHECK: call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %{{.*}}) vd = __builtin_s390_vfidb(vd, 4, 1); // CHECK: call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> %{{.*}}) + vd = __builtin_s390_vfidb(vd, 4, 4); + // CHECK: call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %{{.*}}) vd = __builtin_s390_vfidb(vd, 4, 5); // CHECK: call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> %{{.*}}) vd = __builtin_s390_vfidb(vd, 4, 6); // CHECK: call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> %{{.*}}) vd = __builtin_s390_vfidb(vd, 4, 7); // CHECK: call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %{{.*}}) - vd = __builtin_s390_vfidb(vd, 4, 4); - // CHECK: call <2 x double> @llvm.s390.vfidb(<2 x double> %{{.*}}, i32 4, i32 4) + vd = __builtin_s390_vfidb(vd, 4, 3); + // CHECK: call <2 x double> @llvm.s390.vfidb(<2 x double> %{{.*}}, i32 4, i32 3) } diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c index 58e452ae05c40..ae3b08a1b67eb 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c @@ -593,12 +593,14 @@ void test_float(void) { // CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{.*}}) vd = __builtin_s390_vfidb(vd, 4, 1); // CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> %{{.*}}) + vd = __builtin_s390_vfidb(vd, 4, 4); + // CHECK: call <2 x double> @llvm.roundeven.v2f64(<2 x double> %{{.*}}) vd = __builtin_s390_vfidb(vd, 4, 5); // CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{.*}}) vd = __builtin_s390_vfidb(vd, 4, 6); // CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{.*}}) vd = __builtin_s390_vfidb(vd, 4, 7); // CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{.*}}) - vd = __builtin_s390_vfidb(vd, 4, 4); - // CHECK: call <2 x double> @llvm.s390.vfidb(<2 x double> %{{.*}}, i32 4, i32 4) + vd = __builtin_s390_vfidb(vd, 4, 3); + // CHECK: call <2 x double> @llvm.s390.vfidb(<2 x double> %{{.*}}, i32 4, i32 3) } diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector2-constrained.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector2-constrained.c index 735b6a0249ab6..12c675041af76 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector2-constrained.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector2-constrained.c @@ -59,11 +59,15 @@ void test_float(void) { // CHECK: call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %{{.*}}, metadata !{{.*}}) vf = __builtin_s390_vfisb(vf, 4, 1); // CHECK: call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float> %{{.*}}, metadata !{{.*}}) + vf = __builtin_s390_vfisb(vf, 4, 4); + // CHECK: call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> %{{.*}}, metadata !{{.*}}) vf = __builtin_s390_vfisb(vf, 4, 5); // CHECK: call <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float> %{{.*}}, metadata !{{.*}}) vf = __builtin_s390_vfisb(vf, 4, 6); // CHECK: call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float> %{{.*}}, metadata !{{.*}}) vf = __builtin_s390_vfisb(vf, 4, 7); // CHECK: call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> %{{.*}}, metadata !{{.*}}) + vf = __builtin_s390_vfisb(vf, 4, 3); + // CHECK: call <4 x float> @llvm.s390.vfisb(<4 x float> %{{.*}}, i32 4, i32 3) } diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector2.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector2.c index 3761f252d724b..8f9e164ebef8c 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector2.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector2.c @@ -125,13 +125,15 @@ void test_float(void) { // CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{.*}}) vf = __builtin_s390_vfisb(vf, 4, 1); // CHECK: call <4 x float> @llvm.round.v4f32(<4 x float> %{{.*}}) + vf = __builtin_s390_vfisb(vf, 4, 4); + // CHECK: call <4 x float> @llvm.roundeven.v4f32(<4 x float> %{{.*}}) vf = __builtin_s390_vfisb(vf, 4, 5); // CHECK: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{.*}}) vf = __builtin_s390_vfisb(vf, 4, 6); // CHECK: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{.*}}) vf = __builtin_s390_vfisb(vf, 4, 7); // CHECK: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{.*}}) - vf = __builtin_s390_vfisb(vf, 4, 4); - // CHECK: call <4 x float> @llvm.s390.vfisb(<4 x float> %{{.*}}, i32 4, i32 4) + vf = __builtin_s390_vfisb(vf, 4, 3); + // CHECK: call <4 x float> @llvm.s390.vfisb(<4 x float> %{{.*}}, i32 4, i32 3) } diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c index 6a1f8f0e923f6..4993df20df143 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c @@ -315,4 +315,6 @@ void test_float(void) { // CHECK: call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfidb %{{.*}}, %{{.*}}, 0, 0 vd = vec_round(vd); + // CHECK: call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %{{.*}}, metadata !{{.*}}) + // CHECK-ASM: vfidb %{{.*}}, %{{.*}}, 4, 4 } diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c index 775733ad3b948..d5d15b4dea966 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c @@ -4889,7 +4889,7 @@ void test_float(void) { // CHECK: call <2 x double> @llvm.rint.v2f64(<2 x double> %{{.*}}) // CHECK-ASM: vfidb %{{.*}}, %{{.*}}, 0, 0 vd = vec_round(vd); - // CHECK: call <2 x double> @llvm.s390.vfidb(<2 x double> %{{.*}}, i32 4, i32 4) + // CHECK: call <2 x double> @llvm.roundeven.v2f64(<2 x double> %{{.*}}) // CHECK-ASM: vfidb %{{.*}}, %{{.*}}, 4, 4 vbl = vec_fp_test_data_class(vd, 0, &cc); diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c index 750f5011a2679..25b3e0b68cd02 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c @@ -522,10 +522,10 @@ void test_float(void) { // CHECK-ASM: vfidb %{{.*}}, %{{.*}}, 0, 0 vf = vec_round(vf); - // CHECK: call <4 x float> @llvm.s390.vfisb(<4 x float> %{{.*}}, i32 4, i32 4) + // CHECK: call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfisb %{{.*}}, %{{.*}}, 4, 4 vd = vec_round(vd); - // CHECK: call <2 x double> @llvm.s390.vfidb(<2 x double> %{{.*}}, i32 4, i32 4) + // CHECK: call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfidb %{{.*}}, %{{.*}}, 4, 4 vbi = vec_fp_test_data_class(vf, 0, &cc); diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c index 60df95817a329..c1ef178fcfaa9 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c @@ -908,10 +908,10 @@ void test_float(void) { // CHECK-ASM: vfidb %{{.*}}, %{{.*}}, 0, 0 vf = vec_round(vf); - // CHECK: call <4 x float> @llvm.s390.vfisb(<4 x float> %{{.*}}, i32 4, i32 4) + // CHECK: call <4 x float> @llvm.roundeven.v4f32(<4 x float> %{{.*}}) // CHECK-ASM: vfisb %{{.*}}, %{{.*}}, 4, 4 vd = vec_round(vd); - // CHECK: call <2 x double> @llvm.s390.vfidb(<2 x double> %{{.*}}, i32 4, i32 4) + // CHECK: call <2 x double> @llvm.roundeven.v2f64(<2 x double> %{{.*}}) // CHECK-ASM: vfidb %{{.*}}, %{{.*}}, 4, 4 vbi = vec_fp_test_data_class(vf, 0, &cc); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 9ffd4190b34bb..bb584b7bb5c9a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -556,6 +556,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCEIL, VT, Legal); setOperationAction(ISD::FTRUNC, VT, Legal); setOperationAction(ISD::FROUND, VT, Legal); + setOperationAction(ISD::FROUNDEVEN, VT, Legal); } // No special instructions for these. @@ -582,8 +583,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); setOperationAction(ISD::STRICT_FCEIL, VT, Legal); - setOperationAction(ISD::STRICT_FROUND, VT, Legal); setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); + setOperationAction(ISD::STRICT_FROUND, VT, Legal); + setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); } } } @@ -616,6 +618,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + setOperationAction(ISD::FROUNDEVEN, MVT::v2f64, Legal); // Handle constrained floating-point operations. setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); @@ -630,6 +633,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::v2f64, Legal); setOperationAction(ISD::SETCC, MVT::v2f64, Custom); setOperationAction(ISD::SETCC, MVT::v4f32, Custom); @@ -657,6 +661,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal); @@ -694,8 +699,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::v4f32, Legal); for (auto VT : { MVT::f32, MVT::f64, MVT::f128, MVT::v4f32, MVT::v2f64 }) { setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index aad04a2b4159c..c171982b45692 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -411,6 +411,12 @@ let Predicates = [FeatureFPExtension] in { def : Pat<(any_ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>; def : Pat<(any_ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>; + // Same idea for roundeven, where mode 4 is round towards nearest + // with ties to even. + def : Pat<(any_froundeven FP32:$src), (FIEBRA 4, FP32:$src, 4)>; + def : Pat<(any_froundeven FP64:$src), (FIDBRA 4, FP64:$src, 4)>; + def : Pat<(any_froundeven FP128:$src), (FIXBRA 4, FP128:$src, 4)>; + // Same idea for round, where mode 1 is round towards nearest with // ties away from zero. def : Pat<(any_fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index edd20a5de8c63..03588906159d7 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1397,6 +1397,7 @@ multiclass VectorRounding<Instruction insn, TypedReg tr> { def : FPConversion<insn, any_ffloor, tr, tr, 4, 7>; def : FPConversion<insn, any_fceil, tr, tr, 4, 6>; def : FPConversion<insn, any_ftrunc, tr, tr, 4, 5>; + def : FPConversion<insn, any_froundeven, tr, tr, 4, 4>; def : FPConversion<insn, any_fround, tr, tr, 4, 1>; } diff --git a/llvm/test/CodeGen/SystemZ/fp-round-01.ll b/llvm/test/CodeGen/SystemZ/fp-round-01.ll index 6e35cd0ff0cab..b1db2f547a832 100644 --- a/llvm/test/CodeGen/SystemZ/fp-round-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-round-01.ll @@ -193,3 +193,35 @@ define void @f18(ptr %ptr) { store fp128 %res, ptr %ptr ret void } + +; Test roundeven for f32. +declare float @llvm.roundeven.f32(float %f) +define float @f19(float %f) { +; CHECK-LABEL: f19: +; CHECK: brasl %r14, roundevenf@PLT +; CHECK: br %r14 + %res = call float @llvm.roundeven.f32(float %f) + ret float %res +} + +; Test roundeven for f64. +declare double @llvm.roundeven.f64(double %f) +define double @f20(double %f) { +; CHECK-LABEL: f20: +; CHECK: brasl %r14, roundeven@PLT +; CHECK: br %r14 + %res = call double @llvm.roundeven.f64(double %f) + ret double %res +} + +; Test roundeven for f128. +declare fp128 @llvm.roundeven.f128(fp128 %f) +define void @f21(ptr %ptr) { +; CHECK-LABEL: f21: +; CHECK: brasl %r14, roundevenl@PLT +; CHECK: br %r14 + %src = load fp128, ptr %ptr + %res = call fp128 @llvm.roundeven.f128(fp128 %src) + store fp128 %res, ptr %ptr + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-round-02.ll b/llvm/test/CodeGen/SystemZ/fp-round-02.ll index 2995b2a21fd5b..2cf009ad5b856 100644 --- a/llvm/test/CodeGen/SystemZ/fp-round-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-round-02.ll @@ -197,3 +197,35 @@ define void @f18(ptr %ptr) { store fp128 %res, ptr %ptr ret void } + +; Test roundeven for f32. +declare float @llvm.roundeven.f32(float %f) +define float @f19(float %f) { +; CHECK-LABEL: f19: +; CHECK: fiebra %f0, 4, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.roundeven.f32(float %f) + ret float %res +} + +; Test roundeven for f64. +declare double @llvm.roundeven.f64(double %f) +define double @f20(double %f) { +; CHECK-LABEL: f20: +; CHECK: fidbra %f0, 4, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.roundeven.f64(double %f) + ret double %res +} + +; Test roundeven for f128. +declare fp128 @llvm.roundeven.f128(fp128 %f) +define void @f21(ptr %ptr) { +; CHECK-LABEL: f21: +; CHECK: fixbra %f0, 4, %f0, 4 +; CHECK: br %r14 + %src = load fp128, ptr %ptr + %res = call fp128 @llvm.roundeven.f128(fp128 %src) + store fp128 %res, ptr %ptr + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-round-03.ll b/llvm/test/CodeGen/SystemZ/fp-round-03.ll index d35cafc406ad7..3cae74749efbe 100644 --- a/llvm/test/CodeGen/SystemZ/fp-round-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-round-03.ll @@ -205,3 +205,37 @@ define void @f18(ptr %ptr) { store fp128 %res, ptr %ptr ret void } + +; Test roundeven for f32. +declare float @llvm.roundeven.f32(float %f) +define float @f19(float %f) { +; CHECK-LABEL: f19: +; CHECK: fiebra %f0, 4, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.roundeven.f32(float %f) + ret float %res +} + +; Test roundeven for f64. +declare double @llvm.roundeven.f64(double %f) +define double @f20(double %f) { +; CHECK-LABEL: f20: +; CHECK: fidbra %f0, 4, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.roundeven.f64(double %f) + ret double %res +} + +; Test roundeven for f128. +declare fp128 @llvm.roundeven.f128(fp128 %f) +define void @f21(ptr %ptr) { +; CHECK-LABEL: f21: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 4 +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %src = load fp128, ptr %ptr + %res = call fp128 @llvm.roundeven.f128(fp128 %src) + store fp128 %res, ptr %ptr + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll index 1fbb1790c01dc..964f16d605db6 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll @@ -236,4 +236,42 @@ define void @f18(ptr %ptr) #0 { ret void } +; Test roundeven for f32. +declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) +define float @f19(float %f) #0 { +; CHECK-LABEL: f19: +; CHECK: brasl %r14, roundevenf@PLT +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.roundeven.f32( + float %f, + metadata !"fpexcept.strict") #0 + ret float %res +} + +; Test roundeven for f64. +declare double @llvm.experimental.constrained.roundeven.f64(double, metadata) +define double @f20(double %f) #0 { +; CHECK-LABEL: f20: +; CHECK: brasl %r14, roundeven@PLT +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.roundeven.f64( + double %f, + metadata !"fpexcept.strict") #0 + ret double %res +} + +; Test roundeven for f128. +declare fp128 @llvm.experimental.constrained.roundeven.f128(fp128, metadata) +define void @f21(ptr %ptr) #0 { +; CHECK-LABEL: f21: +; CHECK: brasl %r14, roundevenl@PLT +; CHECK: br %r14 + %src = load fp128, ptr %ptr + %res = call fp128 @llvm.experimental.constrained.roundeven.f128( + fp128 %src, + metadata !"fpexcept.strict") #0 + store fp128 %res, ptr %ptr + ret void +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-round-02.ll b/llvm/test/CodeGen/SystemZ/fp-strict-round-02.ll index bc304a3fb95fb..c7b721e3770e5 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-round-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-round-02.ll @@ -240,4 +240,42 @@ define void @f18(ptr %ptr) #0 { ret void } +; Test roundeven for f32. +declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) +define float @f19(float %f) #0 { +; CHECK-LABEL: f19: +; CHECK: fiebra %f0, 4, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.roundeven.f32( + float %f, + metadata !"fpexcept.strict") #0 + ret float %res +} + +; Test roundeven for f64. +declare double @llvm.experimental.constrained.roundeven.f64(double, metadata) +define double @f20(double %f) #0 { +; CHECK-LABEL: f20: +; CHECK: fidbra %f0, 4, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.roundeven.f64( + double %f, + metadata !"fpexcept.strict") #0 + ret double %res +} + +; Test roundeven for f128. +declare fp128 @llvm.experimental.constrained.roundeven.f128(fp128, metadata) +define void @f21(ptr %ptr) #0 { +; CHECK-LABEL: f21: +; CHECK: fixbra %f0, 4, %f0, 4 +; CHECK: br %r14 + %src = load fp128, ptr %ptr + %res = call fp128 @llvm.experimental.constrained.roundeven.f128( + fp128 %src, + metadata !"fpexcept.strict") #0 + store fp128 %res, ptr %ptr + ret void +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-round-03.ll b/llvm/test/CodeGen/SystemZ/fp-strict-round-03.ll index 2cdff7d5c425e..e99d8b0f01650 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-round-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-round-03.ll @@ -248,4 +248,44 @@ define void @f18(ptr %ptr) #0 { ret void } +; Test roundeven for f32. +declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) +define float @f19(float %f) #0 { +; CHECK-LABEL: f19: +; CHECK: fiebra %f0, 4, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.experimental.constrained.roundeven.f32( + float %f, + metadata !"fpexcept.strict") #0 + ret float %res +} + +; Test roundeven for f64. +declare double @llvm.experimental.constrained.roundeven.f64(double, metadata) +define double @f20(double %f) #0 { +; CHECK-LABEL: f20: +; CHECK: fidbra %f0, 4, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.experimental.constrained.roundeven.f64( + double %f, + metadata !"fpexcept.strict") #0 + ret double %res +} + +; Test roundeven for f128. +declare fp128 @llvm.experimental.constrained.roundeven.f128(fp128, metadata) +define void @f21(ptr %ptr) #0 { +; CHECK-LABEL: f21: +; CHECK: vl [[REG:%v[0-9]+]], 0(%r2) +; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 4 +; CHECK: vst [[RES]], 0(%r2) +; CHECK: br %r14 + %src = load fp128, ptr %ptr + %res = call fp128 @llvm.experimental.constrained.roundeven.f128( + fp128 %src, + metadata !"fpexcept.strict") #0 + store fp128 %res, ptr %ptr + ret void +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/SystemZ/vec-round-01.ll b/llvm/test/CodeGen/SystemZ/vec-round-01.ll index 82718276bb08e..41676e10679bd 100644 --- a/llvm/test/CodeGen/SystemZ/vec-round-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-round-01.ll @@ -8,12 +8,14 @@ declare double @llvm.floor.f64(double) declare double @llvm.ceil.f64(double) declare double @llvm.trunc.f64(double) declare double @llvm.round.f64(double) +declare double @llvm.roundeven.f64(double) declare <2 x double> @llvm.rint.v2f64(<2 x double>) declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) declare <2 x double> @llvm.floor.v2f64(<2 x double>) declare <2 x double> @llvm.ceil.v2f64(<2 x double>) declare <2 x double> @llvm.trunc.v2f64(<2 x double>) declare <2 x double> @llvm.round.v2f64(<2 x double>) +declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) define <2 x double> @f1(<2 x double> %val) { ; CHECK-LABEL: f1: @@ -63,8 +65,16 @@ define <2 x double> @f6(<2 x double> %val) { ret <2 x double> %res } -define double @f7(<2 x double> %val) { +define <2 x double> @f7(<2 x double> %val) { ; CHECK-LABEL: f7: +; CHECK: vfidb %v24, %v24, 4, 4 +; CHECK: br %r14 + %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %val) + ret <2 x double> %res +} + +define double @f8(<2 x double> %val) { +; CHECK-LABEL: f8: ; CHECK: wfidb %f0, %v24, 0, 0 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -72,8 +82,8 @@ define double @f7(<2 x double> %val) { ret double %res } -define double @f8(<2 x double> %val) { -; CHECK-LABEL: f8: +define double @f9(<2 x double> %val) { +; CHECK-LABEL: f9: ; CHECK: wfidb %f0, %v24, 4, 0 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -81,8 +91,8 @@ define double @f8(<2 x double> %val) { ret double %res } -define double @f9(<2 x double> %val) { -; CHECK-LABEL: f9: +define double @f10(<2 x double> %val) { +; CHECK-LABEL: f10: ; CHECK: wfidb %f0, %v24, 4, 7 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -90,8 +100,8 @@ define double @f9(<2 x double> %val) { ret double %res } -define double @f10(<2 x double> %val) { -; CHECK-LABEL: f10: +define double @f11(<2 x double> %val) { +; CHECK-LABEL: f11: ; CHECK: wfidb %f0, %v24, 4, 6 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -99,8 +109,8 @@ define double @f10(<2 x double> %val) { ret double %res } -define double @f11(<2 x double> %val) { -; CHECK-LABEL: f11: +define double @f12(<2 x double> %val) { +; CHECK-LABEL: f12: ; CHECK: wfidb %f0, %v24, 4, 5 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -108,11 +118,20 @@ define double @f11(<2 x double> %val) { ret double %res } -define double @f12(<2 x double> %val) { -; CHECK-LABEL: f12: +define double @f13(<2 x double> %val) { +; CHECK-LABEL: f13: ; CHECK: wfidb %f0, %v24, 4, 1 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 %res = call double @llvm.round.f64(double %scalar) ret double %res } + +define double @f14(<2 x double> %val) { +; CHECK-LABEL: f14: +; CHECK: wfidb %f0, %v24, 4, 4 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.roundeven.f64(double %scalar) + ret double %res +} diff --git a/llvm/test/CodeGen/SystemZ/vec-round-02.ll b/llvm/test/CodeGen/SystemZ/vec-round-02.ll index bcd66ea803d1b..46eddb4da38e8 100644 --- a/llvm/test/CodeGen/SystemZ/vec-round-02.ll +++ b/llvm/test/CodeGen/SystemZ/vec-round-02.ll @@ -8,12 +8,14 @@ declare float @llvm.floor.f32(float) declare float @llvm.ceil.f32(float) declare float @llvm.trunc.f32(float) declare float @llvm.round.f32(float) +declare float @llvm.roundeven.f32(float) declare <4 x float> @llvm.rint.v4f32(<4 x float>) declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) declare <4 x float> @llvm.floor.v4f32(<4 x float>) declare <4 x float> @llvm.ceil.v4f32(<4 x float>) declare <4 x float> @llvm.trunc.v4f32(<4 x float>) declare <4 x float> @llvm.round.v4f32(<4 x float>) +declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) define <4 x float> @f1(<4 x float> %val) { ; CHECK-LABEL: f1: @@ -63,8 +65,16 @@ define <4 x float> @f6(<4 x float> %val) { ret <4 x float> %res } -define float @f7(<4 x float> %val) { +define <4 x float> @f7(<4 x float> %val) { ; CHECK-LABEL: f7: +; CHECK: vfisb %v24, %v24, 4, 4 +; CHECK: br %r14 + %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define float @f8(<4 x float> %val) { +; CHECK-LABEL: f8: ; CHECK: wfisb %f0, %v24, 0, 0 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -72,8 +82,8 @@ define float @f7(<4 x float> %val) { ret float %res } -define float @f8(<4 x float> %val) { -; CHECK-LABEL: f8: +define float @f9(<4 x float> %val) { +; CHECK-LABEL: f9: ; CHECK: wfisb %f0, %v24, 4, 0 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -81,8 +91,8 @@ define float @f8(<4 x float> %val) { ret float %res } -define float @f9(<4 x float> %val) { -; CHECK-LABEL: f9: +define float @f10(<4 x float> %val) { +; CHECK-LABEL: f10: ; CHECK: wfisb %f0, %v24, 4, 7 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -90,8 +100,8 @@ define float @f9(<4 x float> %val) { ret float %res } -define float @f10(<4 x float> %val) { -; CHECK-LABEL: f10: +define float @f11(<4 x float> %val) { +; CHECK-LABEL: f11: ; CHECK: wfisb %f0, %v24, 4, 6 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -99,8 +109,8 @@ define float @f10(<4 x float> %val) { ret float %res } -define float @f11(<4 x float> %val) { -; CHECK-LABEL: f11: +define float @f12(<4 x float> %val) { +; CHECK-LABEL: f12: ; CHECK: wfisb %f0, %v24, 4, 5 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -108,11 +118,20 @@ define float @f11(<4 x float> %val) { ret float %res } -define float @f12(<4 x float> %val) { -; CHECK-LABEL: f12: +define float @f13(<4 x float> %val) { +; CHECK-LABEL: f13: ; CHECK: wfisb %f0, %v24, 4, 1 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 %res = call float @llvm.round.f32(float %scalar) ret float %res } + +define float @f14(<4 x float> %val) { +; CHECK-LABEL: f14: +; CHECK: wfisb %f0, %v24, 4, 4 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.roundeven.f32(float %scalar) + ret float %res +} diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-round-01.ll b/llvm/test/CodeGen/SystemZ/vec-strict-round-01.ll index b82cb8082b7b8..789f54e57ad12 100644 --- a/llvm/test/CodeGen/SystemZ/vec-strict-round-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-strict-round-01.ll @@ -8,12 +8,14 @@ declare double @llvm.experimental.constrained.floor.f64(double, metadata) declare double @llvm.experimental.constrained.ceil.f64(double, metadata) declare double @llvm.experimental.constrained.trunc.f64(double, metadata) declare double @llvm.experimental.constrained.round.f64(double, metadata) +declare double @llvm.experimental.constrained.roundeven.f64(double, metadata) declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata) declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata) declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata) declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata) +declare <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double>, metadata) define <2 x double> @f1(<2 x double> %val) #0 { ; CHECK-LABEL: f1: @@ -77,8 +79,18 @@ define <2 x double> @f6(<2 x double> %val) #0 { ret <2 x double> %res } -define double @f7(<2 x double> %val) #0 { +define <2 x double> @f7(<2 x double> %val) #0 { ; CHECK-LABEL: f7: +; CHECK: vfidb %v24, %v24, 4, 4 +; CHECK: br %r14 + %res = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64( + <2 x double> %val, + metadata !"fpexcept.strict") #0 + ret <2 x double> %res +} + +define double @f8(<2 x double> %val) #0 { +; CHECK-LABEL: f8: ; CHECK: wfidb %f0, %v24, 0, 0 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -89,8 +101,8 @@ define double @f7(<2 x double> %val) #0 { ret double %res } -define double @f8(<2 x double> %val) #0 { -; CHECK-LABEL: f8: +define double @f9(<2 x double> %val) #0 { +; CHECK-LABEL: f9: ; CHECK: wfidb %f0, %v24, 4, 0 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -101,8 +113,8 @@ define double @f8(<2 x double> %val) #0 { ret double %res } -define double @f9(<2 x double> %val) #0 { -; CHECK-LABEL: f9: +define double @f10(<2 x double> %val) #0 { +; CHECK-LABEL: f10: ; CHECK: wfidb %f0, %v24, 4, 7 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -113,8 +125,8 @@ define double @f9(<2 x double> %val) #0 { } -define double @f10(<2 x double> %val) #0 { -; CHECK-LABEL: f10: +define double @f11(<2 x double> %val) #0 { +; CHECK-LABEL: f11: ; CHECK: wfidb %f0, %v24, 4, 6 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -124,8 +136,8 @@ define double @f10(<2 x double> %val) #0 { ret double %res } -define double @f11(<2 x double> %val) #0 { -; CHECK-LABEL: f11: +define double @f12(<2 x double> %val) #0 { +; CHECK-LABEL: f12: ; CHECK: wfidb %f0, %v24, 4, 5 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -135,8 +147,8 @@ define double @f11(<2 x double> %val) #0 { ret double %res } -define double @f12(<2 x double> %val) #0 { -; CHECK-LABEL: f12: +define double @f13(<2 x double> %val) #0 { +; CHECK-LABEL: f13: ; CHECK: wfidb %f0, %v24, 4, 1 ; CHECK: br %r14 %scalar = extractelement <2 x double> %val, i32 0 @@ -146,4 +158,15 @@ define double @f12(<2 x double> %val) #0 { ret double %res } +define double @f14(<2 x double> %val) #0 { +; CHECK-LABEL: f14: +; CHECK: wfidb %f0, %v24, 4, 4 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.experimental.constrained.roundeven.f64( + double %scalar, + metadata !"fpexcept.strict") #0 + ret double %res +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-round-02.ll b/llvm/test/CodeGen/SystemZ/vec-strict-round-02.ll index 701dd5b2302f2..bb876c74f55e3 100644 --- a/llvm/test/CodeGen/SystemZ/vec-strict-round-02.ll +++ b/llvm/test/CodeGen/SystemZ/vec-strict-round-02.ll @@ -8,12 +8,14 @@ declare float @llvm.experimental.constrained.floor.f32(float, metadata) declare float @llvm.experimental.constrained.ceil.f32(float, metadata) declare float @llvm.experimental.constrained.trunc.f32(float, metadata) declare float @llvm.experimental.constrained.round.f32(float, metadata) +declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata) declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata) declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata) declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata) declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata) declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata) +declare <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float>, metadata) define <4 x float> @f1(<4 x float> %val) #0 { ; CHECK-LABEL: f1: @@ -77,8 +79,18 @@ define <4 x float> @f6(<4 x float> %val) #0 { ret <4 x float> %res } -define float @f7(<4 x float> %val) #0 { +define <4 x float> @f7(<4 x float> %val) #0 { ; CHECK-LABEL: f7: +; CHECK: vfisb %v24, %v24, 4, 4 +; CHECK: br %r14 + %res = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32( + <4 x float> %val, + metadata !"fpexcept.strict") #0 + ret <4 x float> %res +} + +define float @f8(<4 x float> %val) #0 { +; CHECK-LABEL: f8: ; CHECK: wfisb %f0, %v24, 0, 0 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -89,8 +101,8 @@ define float @f7(<4 x float> %val) #0 { ret float %res } -define float @f8(<4 x float> %val) #0 { -; CHECK-LABEL: f8: +define float @f9(<4 x float> %val) #0 { +; CHECK-LABEL: f9: ; CHECK: wfisb %f0, %v24, 4, 0 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -101,8 +113,8 @@ define float @f8(<4 x float> %val) #0 { ret float %res } -define float @f9(<4 x float> %val) #0 { -; CHECK-LABEL: f9: +define float @f10(<4 x float> %val) #0 { +; CHECK-LABEL: f10: ; CHECK: wfisb %f0, %v24, 4, 7 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -112,8 +124,8 @@ define float @f9(<4 x float> %val) #0 { ret float %res } -define float @f10(<4 x float> %val) #0 { -; CHECK-LABEL: f10: +define float @f11(<4 x float> %val) #0 { +; CHECK-LABEL: f11: ; CHECK: wfisb %f0, %v24, 4, 6 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -123,8 +135,8 @@ define float @f10(<4 x float> %val) #0 { ret float %res } -define float @f11(<4 x float> %val) #0 { -; CHECK-LABEL: f11: +define float @f12(<4 x float> %val) #0 { +; CHECK-LABEL: f12: ; CHECK: wfisb %f0, %v24, 4, 5 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -134,8 +146,8 @@ define float @f11(<4 x float> %val) #0 { ret float %res } -define float @f12(<4 x float> %val) #0 { -; CHECK-LABEL: f12: +define float @f13(<4 x float> %val) #0 { +; CHECK-LABEL: f13: ; CHECK: wfisb %f0, %v24, 4, 1 ; CHECK: br %r14 %scalar = extractelement <4 x float> %val, i32 0 @@ -145,4 +157,15 @@ define float @f12(<4 x float> %val) #0 { ret float %res } +define float @f14(<4 x float> %val) #0 { +; CHECK-LABEL: f14: +; CHECK: wfisb %f0, %v24, 4, 4 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.experimental.constrained.roundeven.f32( + float %scalar, + metadata !"fpexcept.strict") #0 + ret float %res +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index b08f0e5a74d56..a3e453de913fe 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -6050,6 +6050,178 @@ entry: ret void } +define <1 x float> @constrained_vector_roundeven_v1f32(ptr %a) #0 { +; S390X-LABEL: constrained_vector_roundeven_v1f32: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -160 +; S390X-NEXT: .cfi_def_cfa_offset 320 +; S390X-NEXT: le %f0, 0(%r2) +; S390X-NEXT: brasl %r14, roundevenf@PLT +; S390X-NEXT: lmg %r14, %r15, 272(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_roundeven_v1f32: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: lde %f0, 0(%r2) +; SZ13-NEXT: fiebra %f0, 4, %f0, 4 +; SZ13-NEXT: vlr %v24, %v0 +; SZ13-NEXT: br %r14 +entry: + %b = load <1 x float>, ptr %a + %round = call <1 x float> @llvm.experimental.constrained.roundeven.v1f32( + <1 x float> %b, + metadata !"fpexcept.strict") #0 + ret <1 x float> %round +} + +define <2 x double> @constrained_vector_roundeven_v2f64(ptr %a) #0 { +; S390X-LABEL: constrained_vector_roundeven_v2f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -176 +; S390X-NEXT: .cfi_def_cfa_offset 336 +; S390X-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: ld %f0, 8(%r2) +; S390X-NEXT: ld %f8, 0(%r2) +; S390X-NEXT: brasl %r14, roundeven@PLT +; S390X-NEXT: ldr %f9, %f0 +; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: brasl %r14, roundeven@PLT +; S390X-NEXT: ldr %f2, %f9 +; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 288(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_roundeven_v2f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: vl %v0, 0(%r2), 3 +; SZ13-NEXT: vfidb %v24, %v0, 4, 4 +; SZ13-NEXT: br %r14 +entry: + %b = load <2 x double>, ptr %a + %round = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64( + <2 x double> %b, + metadata !"fpexcept.strict") #0 + ret <2 x double> %round +} + +define <3 x float> @constrained_vector_roundeven_v3f32(ptr %a) #0 { +; S390X-LABEL: constrained_vector_roundeven_v3f32: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -184 +; S390X-NEXT: .cfi_def_cfa_offset 344 +; S390X-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: .cfi_offset %f10, -184 +; S390X-NEXT: lg %r0, 0(%r2) +; S390X-NEXT: le %f0, 8(%r2) +; S390X-NEXT: risbg %r1, %r0, 0, 159, 0 +; S390X-NEXT: ldgr %f8, %r1 +; S390X-NEXT: sllg %r0, %r0, 32 +; S390X-NEXT: ldgr %f9, %r0 +; S390X-NEXT: brasl %r14, roundevenf@PLT +; S390X-NEXT: ler %f10, %f0 +; S390X-NEXT: ler %f0, %f9 +; S390X-NEXT: brasl %r14, roundevenf@PLT +; S390X-NEXT: ler %f9, %f0 +; S390X-NEXT: ler %f0, %f8 +; S390X-NEXT: brasl %r14, roundevenf@PLT +; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f10 +; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 296(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_roundeven_v3f32: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: vl %v0, 0(%r2), 4 +; SZ13-NEXT: vrepf %v1, %v0, 2 +; SZ13-NEXT: vrepf %v2, %v0, 1 +; SZ13-NEXT: fiebra %f1, 4, %f1, 4 +; SZ13-NEXT: fiebra %f2, 4, %f2, 4 +; SZ13-NEXT: fiebra %f0, 4, %f0, 4 +; SZ13-NEXT: vmrhf %v0, %v0, %v2 +; SZ13-NEXT: vrepf %v1, %v1, 0 +; SZ13-NEXT: vmrhg %v24, %v0, %v1 +; SZ13-NEXT: br %r14 +entry: + %b = load <3 x float>, ptr %a + %round = call <3 x float> @llvm.experimental.constrained.roundeven.v3f32( + <3 x float> %b, + metadata !"fpexcept.strict") #0 + ret <3 x float> %round +} + +define void @constrained_vector_roundeven_v3f64(ptr %a) #0 { +; S390X-LABEL: constrained_vector_roundeven_v3f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r13, %r15, 104(%r15) +; S390X-NEXT: .cfi_offset %r13, -56 +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -184 +; S390X-NEXT: .cfi_def_cfa_offset 344 +; S390X-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: .cfi_offset %f10, -184 +; S390X-NEXT: lgr %r13, %r2 +; S390X-NEXT: ld %f8, 0(%r2) +; S390X-NEXT: ld %f0, 16(%r2) +; S390X-NEXT: ld %f9, 8(%r2) +; S390X-NEXT: brasl %r14, roundeven@PLT +; S390X-NEXT: ldr %f10, %f0 +; S390X-NEXT: ldr %f0, %f9 +; S390X-NEXT: brasl %r14, roundeven@PLT +; S390X-NEXT: ldr %f9, %f0 +; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: brasl %r14, roundeven@PLT +; S390X-NEXT: std %f0, 0(%r13) +; S390X-NEXT: std %f9, 8(%r13) +; S390X-NEXT: std %f10, 16(%r13) +; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r13, %r15, 288(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_roundeven_v3f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: vl %v0, 0(%r2), 4 +; SZ13-NEXT: ld %f1, 16(%r2) +; SZ13-NEXT: vfidb %v0, %v0, 4, 4 +; SZ13-NEXT: fidbra %f1, 4, %f1, 4 +; SZ13-NEXT: vst %v0, 0(%r2), 4 +; SZ13-NEXT: std %f1, 16(%r2) +; SZ13-NEXT: br %r14 +entry: + %b = load <3 x double>, ptr %a + %round = call <3 x double> @llvm.experimental.constrained.roundeven.v3f64( + <3 x double> %b, + metadata !"fpexcept.strict") #0 + store <3 x double> %round, ptr %a + ret void +} + define <1 x float> @constrained_vector_trunc_v1f32(ptr %a) #0 { ; S390X-LABEL: constrained_vector_trunc_v1f32: ; S390X: # %bb.0: # %entry @@ -6230,7 +6402,7 @@ define <1 x float> @constrained_vector_tan_v1f32() #0 { ; S390X-NEXT: .cfi_offset %r15, -40 ; S390X-NEXT: aghi %r15, -160 ; S390X-NEXT: .cfi_def_cfa_offset 320 -; S390X-NEXT: larl %r1, .LCPI119_0 +; S390X-NEXT: larl %r1, .LCPI123_0 ; S390X-NEXT: le %f0, 0(%r1) ; S390X-NEXT: brasl %r14, tanf@PLT ; S390X-NEXT: lmg %r14, %r15, 272(%r15) @@ -6243,7 +6415,7 @@ define <1 x float> @constrained_vector_tan_v1f32() #0 { ; SZ13-NEXT: .cfi_offset %r15, -40 ; SZ13-NEXT: aghi %r15, -160 ; SZ13-NEXT: .cfi_def_cfa_offset 320 -; SZ13-NEXT: larl %r1, .LCPI119_0 +; SZ13-NEXT: larl %r1, .LCPI123_0 ; SZ13-NEXT: lde %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, tanf@PLT ; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 @@ -6268,10 +6440,10 @@ define <2 x double> @constrained_vector_tan_v2f64() #0 { ; S390X-NEXT: .cfi_def_cfa_offset 328 ; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 -; S390X-NEXT: larl %r1, .LCPI120_0 +; S390X-NEXT: larl %r1, .LCPI124_0 ; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, tan@PLT -; S390X-NEXT: larl %r1, .LCPI120_1 +; S390X-NEXT: larl %r1, .LCPI124_1 ; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 @@ -6288,10 +6460,10 @@ define <2 x double> @constrained_vector_tan_v2f64() #0 { ; SZ13-NEXT: .cfi_offset %r15, -40 ; SZ13-NEXT: aghi %r15, -176 ; SZ13-NEXT: .cfi_def_cfa_offset 336 -; SZ13-NEXT: larl %r1, .LCPI120_0 +; SZ13-NEXT: larl %r1, .LCPI124_0 ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, tan@PLT -; SZ13-NEXT: larl %r1, .LCPI120_1 +; SZ13-NEXT: larl %r1, .LCPI124_1 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: ld %f0, 0(%r1) @@ -6321,15 +6493,15 @@ define <3 x float> @constrained_vector_tan_v3f32() #0 { ; S390X-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: .cfi_offset %f9, -176 -; S390X-NEXT: larl %r1, .LCPI121_0 +; S390X-NEXT: larl %r1, .LCPI125_0 ; S390X-NEXT: le %f0, 0(%r1) ; S390X-NEXT: brasl %r14, tanf@PLT -; S390X-NEXT: larl %r1, .LCPI121_1 +; S390X-NEXT: larl %r1, .LCPI125_1 ; S390X-NEXT: le %f1, 0(%r1) ; S390X-NEXT: ler %f8, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, tanf@PLT -; S390X-NEXT: larl %r1, .LCPI121_2 +; S390X-NEXT: larl %r1, .LCPI125_2 ; S390X-NEXT: le %f1, 0(%r1) ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 @@ -6348,15 +6520,15 @@ define <3 x float> @constrained_vector_tan_v3f32() #0 { ; SZ13-NEXT: .cfi_offset %r15, -40 ; SZ13-NEXT: aghi %r15, -192 ; SZ13-NEXT: .cfi_def_cfa_offset 352 -; SZ13-NEXT: larl %r1, .LCPI121_0 +; SZ13-NEXT: larl %r1, .LCPI125_0 ; SZ13-NEXT: lde %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, tanf@PLT -; SZ13-NEXT: larl %r1, .LCPI121_1 +; SZ13-NEXT: larl %r1, .LCPI125_1 ; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 ; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: lde %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, tanf@PLT -; SZ13-NEXT: larl %r1, .LCPI121_2 +; SZ13-NEXT: larl %r1, .LCPI125_2 ; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: lde %f0, 0(%r1) @@ -6470,20 +6642,20 @@ define <4 x double> @constrained_vector_tan_v4f64() #0 { ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: .cfi_offset %f10, -184 -; S390X-NEXT: larl %r1, .LCPI123_0 +; S390X-NEXT: larl %r1, .LCPI127_0 ; S390X-NEXT: ld %f0, 0(%r1) ; S390X-NEXT: brasl %r14, tan@PLT -; S390X-NEXT: larl %r1, .LCPI123_1 +; S390X-NEXT: larl %r1, .LCPI127_1 ; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, tan@PLT -; S390X-NEXT: larl %r1, .LCPI123_2 +; S390X-NEXT: larl %r1, .LCPI127_2 ; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ldr %f9, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, tan@PLT -; S390X-NEXT: larl %r1, .LCPI123_3 +; S390X-NEXT: larl %r1, .LCPI127_3 ; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 @@ -6504,10 +6676,10 @@ define <4 x double> @constrained_vector_tan_v4f64() #0 { ; SZ13-NEXT: .cfi_offset %r15, -40 ; SZ13-NEXT: aghi %r15, -192 ; SZ13-NEXT: .cfi_def_cfa_offset 352 -; SZ13-NEXT: larl %r1, .LCPI123_0 +; SZ13-NEXT: larl %r1, .LCPI127_0 ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, tan@PLT -; SZ13-NEXT: larl %r1, .LCPI123_1 +; SZ13-NEXT: larl %r1, .LCPI127_1 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: ld %f0, 0(%r1) @@ -6515,11 +6687,11 @@ define <4 x double> @constrained_vector_tan_v4f64() #0 { ; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vmrhg %v0, %v0, %v1 -; SZ13-NEXT: larl %r1, .LCPI123_2 +; SZ13-NEXT: larl %r1, .LCPI127_2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, tan@PLT -; SZ13-NEXT: larl %r1, .LCPI123_3 +; SZ13-NEXT: larl %r1, .LCPI127_3 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: ld %f0, 0(%r1) @@ -6547,9 +6719,9 @@ define <1 x float> @constrained_vector_atan2_v1f32() #0 { ; S390X-NEXT: .cfi_offset %r15, -40 ; S390X-NEXT: aghi %r15, -160 ; S390X-NEXT: .cfi_def_cfa_offset 320 -; S390X-NEXT: larl %r1, .LCPI124_0 +; S390X-NEXT: larl %r1, .LCPI128_0 ; S390X-NEXT: le %f0, 0(%r1) -; S390X-NEXT: larl %r1, .LCPI124_1 +; S390X-NEXT: larl %r1, .LCPI128_1 ; S390X-NEXT: le %f2, 0(%r1) ; S390X-NEXT: brasl %r14, atan2f@PLT ; S390X-NEXT: lmg %r14, %r15, 272(%r15) @@ -6562,9 +6734,9 @@ define <1 x float> @constrained_vector_atan2_v1f32() #0 { ; SZ13-NEXT: .cfi_offset %r15, -40 ; SZ13-NEXT: aghi %r15, -160 ; SZ13-NEXT: .cfi_def_cfa_offset 320 -; SZ13-NEXT: larl %r1, .LCPI124_0 +; SZ13-NEXT: larl %r1, .LCPI128_0 ; SZ13-NEXT: lde %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI124_1 +; SZ13-NEXT: larl %r1, .LCPI128_1 ; SZ13-NEXT: lde %f2, 0(%r1) ; SZ13-NEXT: brasl %r14, atan2f@PLT ; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 @@ -6590,14 +6762,14 @@ define <2 x double> @constrained_vector_atan2_v2f64() #0 { ; S390X-NEXT: .cfi_def_cfa_offset 328 ; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 -; S390X-NEXT: larl %r1, .LCPI125_0 +; S390X-NEXT: larl %r1, .LCPI129_0 ; S390X-NEXT: ld %f0, 0(%r1) -; S390X-NEXT: larl %r1, .LCPI125_1 +; S390X-NEXT: larl %r1, .LCPI129_1 ; S390X-NEXT: ld %f2, 0(%r1) ; S390X-NEXT: brasl %r14, atan2@PLT -; S390X-NEXT: larl %r1, .LCPI125_2 +; S390X-NEXT: larl %r1, .LCPI129_2 ; S390X-NEXT: ld %f1, 0(%r1) -; S390X-NEXT: larl %r1, .LCPI125_3 +; S390X-NEXT: larl %r1, .LCPI129_3 ; S390X-NEXT: ld %f2, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 @@ -6614,16 +6786,16 @@ define <2 x double> @constrained_vector_atan2_v2f64() #0 { ; SZ13-NEXT: .cfi_offset %r15, -40 ; SZ13-NEXT: aghi %r15, -176 ; SZ13-NEXT: .cfi_def_cfa_offset 336 -; SZ13-NEXT: larl %r1, .LCPI125_0 +; SZ13-NEXT: larl %r1, .LCPI129_0 ; SZ13-NEXT: ld %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI125_1 +; SZ13-NEXT: larl %r1, .LCPI129_1 ; SZ13-NEXT: ld %f2, 0(%r1) ; SZ13-NEXT: brasl %r14, atan2@PLT -; SZ13-NEXT: larl %r1, .LCPI125_2 +; SZ13-NEXT: larl %r1, .LCPI129_2 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: ld %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI125_3 +; SZ13-NEXT: larl %r1, .LCPI129_3 ; SZ13-NEXT: ld %f2, 0(%r1) ; SZ13-NEXT: brasl %r14, atan2@PLT ; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload @@ -6652,21 +6824,21 @@ define <3 x float> @constrained_vector_atan2_v3f32() #0 { ; S390X-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: .cfi_offset %f9, -176 -; S390X-NEXT: larl %r1, .LCPI126_0 +; S390X-NEXT: larl %r1, .LCPI130_0 ; S390X-NEXT: le %f0, 0(%r1) -; S390X-NEXT: larl %r1, .LCPI126_1 +; S390X-NEXT: larl %r1, .LCPI130_1 ; S390X-NEXT: le %f2, 0(%r1) ; S390X-NEXT: brasl %r14, atan2f@PLT -; S390X-NEXT: larl %r1, .LCPI126_2 +; S390X-NEXT: larl %r1, .LCPI130_2 ; S390X-NEXT: le %f1, 0(%r1) -; S390X-NEXT: larl %r1, .LCPI126_3 +; S390X-NEXT: larl %r1, .LCPI130_3 ; S390X-NEXT: le %f2, 0(%r1) ; S390X-NEXT: ler %f8, %f0 ; S390X-NEXT: ler %f0, %f1 ; S390X-NEXT: brasl %r14, atan2f@PLT -; S390X-NEXT: larl %r1, .LCPI126_4 +; S390X-NEXT: larl %r1, .LCPI130_4 ; S390X-NEXT: le %f1, 0(%r1) -; S390X-NEXT: larl %r1, .LCPI126_5 +; S390X-NEXT: larl %r1, .LCPI130_5 ; S390X-NEXT: le %f2, 0(%r1) ; S390X-NEXT: ler %f9, %f0 ; S390X-NEXT: ler %f0, %f1 @@ -6685,23 +6857,23 @@ define <3 x float> @constrained_vector_atan2_v3f32() #0 { ; SZ13-NEXT: .cfi_offset %r15, -40 ; SZ13-NEXT: aghi %r15, -192 ; SZ13-NEXT: .cfi_def_cfa_offset 352 -; SZ13-NEXT: larl %r1, .LCPI126_0 +; SZ13-NEXT: larl %r1, .LCPI130_0 ; SZ13-NEXT: lde %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI126_1 +; SZ13-NEXT: larl %r1, .LCPI130_1 ; SZ13-NEXT: lde %f2, 0(%r1) ; SZ13-NEXT: brasl %r14, atan2f@PLT -; SZ13-NEXT: larl %r1, .LCPI126_2 +; SZ13-NEXT: larl %r1, .LCPI130_2 ; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 ; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: lde %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI126_3 +; SZ13-NEXT: larl %r1, .LCPI130_3 ; SZ13-NEXT: lde %f2, 0(%r1) ; SZ13-NEXT: brasl %r14, atan2f@PLT -; SZ13-NEXT: larl %r1, .LCPI126_4 +; SZ13-NEXT: larl %r1, .LCPI130_4 ; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: lde %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI126_5 +; SZ13-NEXT: larl %r1, .LCPI130_5 ; SZ13-NEXT: lde %f2, 0(%r1) ; SZ13-NEXT: brasl %r14, atan2f@PLT ; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload @@ -6838,28 +7010,28 @@ define <4 x double> @constrained_vector_atan2_v4f64() #0 { ; S390X-NEXT: .cfi_offset %f8, -168 ; S390X-NEXT: .cfi_offset %f9, -176 ; S390X-NEXT: .cfi_offset %f10, -184 -; S390X-NEXT: larl %r1, .LCPI128_0 +; S390X-NEXT: larl %r1, .LCPI132_0 ; S390X-NEXT: ld %f0, 0(%r1) -; S390X-NEXT: larl %r1, .LCPI128_1 +; S390X-NEXT: larl %r1, .LCPI132_1 ; S390X-NEXT: ld %f2, 0(%r1) ; S390X-NEXT: brasl %r14, atan2@PLT -; S390X-NEXT: larl %r1, .LCPI128_2 +; S390X-NEXT: larl %r1, .LCPI132_2 ; S390X-NEXT: ld %f1, 0(%r1) -; S390X-NEXT: larl %r1, .LCPI128_3 +; S390X-NEXT: larl %r1, .LCPI132_3 ; S390X-NEXT: ld %f2, 0(%r1) ; S390X-NEXT: ldr %f8, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, atan2@PLT -; S390X-NEXT: larl %r1, .LCPI128_4 +; S390X-NEXT: larl %r1, .LCPI132_4 ; S390X-NEXT: ld %f1, 0(%r1) -; S390X-NEXT: larl %r1, .LCPI128_5 +; S390X-NEXT: larl %r1, .LCPI132_5 ; S390X-NEXT: ld %f2, 0(%r1) ; S390X-NEXT: ldr %f9, %f0 ; S390X-NEXT: ldr %f0, %f1 ; S390X-NEXT: brasl %r14, atan2@PLT -; S390X-NEXT: larl %r1, .LCPI128_6 +; S390X-NEXT: larl %r1, .LCPI132_6 ; S390X-NEXT: ld %f1, 0(%r1) -; S390X-NEXT: larl %r1, .LCPI128_7 +; S390X-NEXT: larl %r1, .LCPI132_7 ; S390X-NEXT: ld %f2, 0(%r1) ; S390X-NEXT: ldr %f10, %f0 ; S390X-NEXT: ldr %f0, %f1 @@ -6880,32 +7052,32 @@ define <4 x double> @constrained_vector_atan2_v4f64() #0 { ; SZ13-NEXT: .cfi_offset %r15, -40 ; SZ13-NEXT: aghi %r15, -192 ; SZ13-NEXT: .cfi_def_cfa_offset 352 -; SZ13-NEXT: larl %r1, .LCPI128_0 +; SZ13-NEXT: larl %r1, .LCPI132_0 ; SZ13-NEXT: ld %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI128_1 +; SZ13-NEXT: larl %r1, .LCPI132_1 ; SZ13-NEXT: ld %f2, 0(%r1) ; SZ13-NEXT: brasl %r14, atan2@PLT -; SZ13-NEXT: larl %r1, .LCPI128_2 +; SZ13-NEXT: larl %r1, .LCPI132_2 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: ld %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI128_3 +; SZ13-NEXT: larl %r1, .LCPI132_3 ; SZ13-NEXT: ld %f2, 0(%r1) ; SZ13-NEXT: brasl %r14, atan2@PLT ; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vmrhg %v0, %v0, %v1 -; SZ13-NEXT: larl %r1, .LCPI128_4 +; SZ13-NEXT: larl %r1, .LCPI132_4 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: ld %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI128_5 +; SZ13-NEXT: larl %r1, .LCPI132_5 ; SZ13-NEXT: ld %f2, 0(%r1) ; SZ13-NEXT: brasl %r14, atan2@PLT -; SZ13-NEXT: larl %r1, .LCPI128_6 +; SZ13-NEXT: larl %r1, .LCPI132_6 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill ; SZ13-NEXT: ld %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI128_7 +; SZ13-NEXT: larl %r1, .LCPI132_7 ; SZ13-NEXT: ld %f2, 0(%r1) ; SZ13-NEXT: brasl %r14, atan2@PLT ; SZ13-NEXT: vl %v1, 176(%r15), 3 # 16-byte Folded Reload @@ -6953,6 +7125,7 @@ declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata) declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata) declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata) +declare <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double>, metadata) declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata) declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata) @@ -6981,6 +7154,7 @@ declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata) declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata) declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata) +declare <1 x float> @llvm.experimental.constrained.roundeven.v1f32(<1 x float>, metadata) declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata) declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) @@ -7033,6 +7207,8 @@ declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, meta declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata) declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata) declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata) +declare <3 x float> @llvm.experimental.constrained.roundeven.v3f32(<3 x float>, metadata) +declare <3 x double> @llvm.experimental.constrained.roundeven.v3f64(<3 x double>, metadata) declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata) declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata) @@ -7062,4 +7238,5 @@ declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata) declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata) declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata) +declare <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double>, metadata) declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits