Author: Farzon Lotfi Date: 2024-06-10T20:46:26-04:00 New Revision: 189d4711915f4ce89b373f3cbcfe1f19c73becd9
URL: https://github.com/llvm/llvm-project/commit/189d4711915f4ce89b373f3cbcfe1f19c73becd9 DIFF: https://github.com/llvm/llvm-project/commit/189d4711915f4ce89b373f3cbcfe1f19c73becd9.diff LOG: [clang] Reland Add tanf16 builtin and support for tan constrained intrinsic (#94559) Relanding this PR now that https://github.com/llvm/llvm-project/pull/90503 has merged. with `FTAN` landing in [TargetLoweringBase.cpp:L1021](https://github.com/llvm/llvm-project/blob/main/llvm/lib/CodeGen/TargetLoweringBase.cpp#L1020C23-L1021C63 ) There is now a llvm tan intrinsic 32\64\128 Expand case for all llvm backends. In LLVM, the `llvm.experimental.constrained.cos` and `llvm.experimental.constrained.sin` intrinsics are used for performing cosine and sine calculations with additional constraints on floating-point operations. This behavior is expected for all floating-point math intrinsics. This change adds these constraints for the `tan` intrinsic. - `Builtins.td` - replace TanF128 with F16F128MathTemplate - `CGBuiltin.cpp` - map existing tan builtins to `tan` and `constrained_tan` intrinsic - `ConstrainedOps.def` map tan and constrained_tan to an ISDOpcode. resolves #91421 --------- Co-authored-by: Farzon Lotfi <far...@farzon.com> Added: Modified: clang/include/clang/Basic/Builtins.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/X86/math-builtins.c clang/test/CodeGen/constrained-math-builtins.c clang/test/CodeGen/math-libcalls.c clang/test/CodeGenOpenCL/builtins-f16.cl llvm/docs/LangRef.rst llvm/include/llvm/IR/ConstrainedOps.def llvm/include/llvm/IR/Intrinsics.td llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/test/Assembler/fp-intrinsics-attr.ll llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll llvm/test/CodeGen/AArch64/fp-intrinsics.ll llvm/test/CodeGen/ARM/fp-intrinsics.ll llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll llvm/test/CodeGen/X86/fp-intrinsics.ll llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll llvm/test/CodeGen/X86/fp128-libcalls-strict.ll llvm/test/CodeGen/X86/fp80-strict-libcalls.ll llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll llvm/test/Feature/fp-intrinsics.ll Removed: ################################################################################ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 11982af3fa609..7bef5fd7ad40f 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -482,11 +482,11 @@ def SqrtF16F128 : Builtin, F16F128MathTemplate { let Prototype = "T(T)"; } -def TanF128 : Builtin { - let Spellings = ["__builtin_tanf128"]; +def TanF16F128 : Builtin, F16F128MathTemplate { + let Spellings = ["__builtin_tan"]; let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions]; - let Prototype = "__float128(__float128)"; + let Prototype = "T(T)"; } def TanhF128 : Builtin { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c16b69ba87567..06e201fa71e6f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2923,6 +2923,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, SetSqrtFPAccuracy(Call); return RValue::get(Call); } + + case Builtin::BItan: + case Builtin::BItanf: + case Builtin::BItanl: + case Builtin::BI__builtin_tan: + case Builtin::BI__builtin_tanf: + case Builtin::BI__builtin_tanf16: + case Builtin::BI__builtin_tanl: + case Builtin::BI__builtin_tanf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan)); + case Builtin::BItrunc: case Builtin::BItruncf: case Builtin::BItruncl: diff --git a/clang/test/CodeGen/X86/math-builtins.c b/clang/test/CodeGen/X86/math-builtins.c index 093239b448260..1e0f129b98610 100644 --- a/clang/test/CodeGen/X86/math-builtins.c +++ b/clang/test/CodeGen/X86/math-builtins.c @@ -674,10 +674,10 @@ __builtin_sqrt(f); __builtin_sqrtf(f); __builtin_sqrtl(f); __builtin_ __builtin_tan(f); __builtin_tanf(f); __builtin_tanl(f); __builtin_tanf128(f); -// NO__ERRNO: declare double @tan(double noundef) [[READNONE]] -// NO__ERRNO: declare float @tanf(float noundef) [[READNONE]] -// NO__ERRNO: declare x86_fp80 @tanl(x86_fp80 noundef) [[READNONE]] -// NO__ERRNO: declare fp128 @tanf128(fp128 noundef) [[READNONE]] +// NO__ERRNO: declare double @llvm.tan.f64(double) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare float @llvm.tan.f32(float) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare x86_fp80 @llvm.tan.f80(x86_fp80) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare fp128 @llvm.tan.f128(fp128) [[READNONE_INTRINSIC]] // HAS_ERRNO: declare double @tan(double noundef) [[NOT_READNONE]] // HAS_ERRNO: declare float @tanf(float noundef) [[NOT_READNONE]] // HAS_ERRNO: declare x86_fp80 @tanl(x86_fp80 noundef) [[NOT_READNONE]] diff --git a/clang/test/CodeGen/constrained-math-builtins.c b/clang/test/CodeGen/constrained-math-builtins.c index 2de832dd2b6ca..6cc3a10a1e794 100644 --- a/clang/test/CodeGen/constrained-math-builtins.c +++ b/clang/test/CodeGen/constrained-math-builtins.c @@ -183,6 +183,14 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c, _ // CHECK: call x86_fp80 @llvm.experimental.constrained.sqrt.f80(x86_fp80 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK: call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + __builtin_tan(f); __builtin_tanf(f); __builtin_tanl(f); __builtin_tanf128(f); + +// CHECK: call double @llvm.experimental.constrained.tan.f64(double %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") +// CHECK: call float @llvm.experimental.constrained.tan.f32(float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") +// CHECK: call x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") +// CHECK: call fp128 @llvm.experimental.constrained.tan.f128(fp128 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + + __builtin_trunc(f); __builtin_truncf(f); __builtin_truncl(f); __builtin_truncf128(f); // CHECK: call double @llvm.experimental.constrained.trunc.f64(double %{{.*}}, metadata !"fpexcept.strict") @@ -315,6 +323,11 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c, _ // CHECK: declare x86_fp80 @llvm.experimental.constrained.sqrt.f80(x86_fp80, metadata, metadata) // CHECK: declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) +// CHECK: declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) +// CHECK: declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) +// CHECK: declare x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80, metadata, metadata) +// CHECK: declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata) + // CHECK: declare double @llvm.experimental.constrained.trunc.f64(double, metadata) // CHECK: declare float @llvm.experimental.constrained.trunc.f32(float, metadata) // CHECK: declare x86_fp80 @llvm.experimental.constrained.trunc.f80(x86_fp80, metadata) diff --git a/clang/test/CodeGen/math-libcalls.c b/clang/test/CodeGen/math-libcalls.c index 29c312ba0ecac..a249182692762 100644 --- a/clang/test/CodeGen/math-libcalls.c +++ b/clang/test/CodeGen/math-libcalls.c @@ -662,15 +662,15 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { tan(f); tanf(f); tanl(f); -// NO__ERRNO: declare double @tan(double noundef) [[READNONE]] -// NO__ERRNO: declare float @tanf(float noundef) [[READNONE]] -// NO__ERRNO: declare x86_fp80 @tanl(x86_fp80 noundef) [[READNONE]] +// NO__ERRNO: declare double @llvm.tan.f64(double) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare float @llvm.tan.f32(float) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare x86_fp80 @llvm.tan.f80(x86_fp80) [[READNONE_INTRINSIC]] // HAS_ERRNO: declare double @tan(double noundef) [[NOT_READNONE]] // HAS_ERRNO: declare float @tanf(float noundef) [[NOT_READNONE]] // HAS_ERRNO: declare x86_fp80 @tanl(x86_fp80 noundef) [[NOT_READNONE]] -// HAS_MAYTRAP: declare double @tan(double noundef) [[NOT_READNONE]] -// HAS_MAYTRAP: declare float @tanf(float noundef) [[NOT_READNONE]] -// HAS_MAYTRAP: declare x86_fp80 @tanl(x86_fp80 noundef) [[NOT_READNONE]] +// HAS_MAYTRAP: declare double @llvm.experimental.constrained.tan.f64( +// HAS_MAYTRAP: declare float @llvm.experimental.constrained.tan.f32( +// HAS_MAYTRAP: declare x86_fp80 @llvm.experimental.constrained.tan.f80( tanh(f); tanhf(f); tanhl(f); diff --git a/clang/test/CodeGenOpenCL/builtins-f16.cl b/clang/test/CodeGenOpenCL/builtins-f16.cl index adf7cdde154f5..d7bffdad5c548 100644 --- a/clang/test/CodeGenOpenCL/builtins-f16.cl +++ b/clang/test/CodeGenOpenCL/builtins-f16.cl @@ -66,6 +66,9 @@ void test_half_builtins(half h0, half h1, half h2, int i0) { // CHECK: call half @llvm.sqrt.f16(half %h0) res = __builtin_sqrtf16(h0); + // CHECK: call half @llvm.tan.f16(half %h0) + res = __builtin_tanf16(h0); + // CHECK: call half @llvm.trunc.f16(half %h0) res = __builtin_truncf16(h0); diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 9fb2c048a5c86..c11a6627d81d3 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -26244,6 +26244,42 @@ same values as the libm ``cos`` functions would, and handles error conditions in the same way. +'``llvm.experimental.constrained.tan``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare <type> + @llvm.experimental.constrained.tan(<type> <op1>, + metadata <rounding mode>, + metadata <exception behavior>) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.tan``' intrinsic returns the tangent of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the tangent of the specified operand, returning the +same values as the libm ``tan`` functions would, and handles error +conditions in the same way. + + '``llvm.experimental.constrained.exp``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def index 41aa44de957f9..a7b37c5cb204d 100644 --- a/llvm/include/llvm/IR/ConstrainedOps.def +++ b/llvm/include/llvm/IR/ConstrainedOps.def @@ -95,6 +95,7 @@ DAG_FUNCTION(round, 1, 0, experimental_constrained_round, FROUND) DAG_FUNCTION(roundeven, 1, 0, experimental_constrained_roundeven, FROUNDEVEN) DAG_FUNCTION(sin, 1, 1, experimental_constrained_sin, FSIN) DAG_FUNCTION(sqrt, 1, 1, experimental_constrained_sqrt, FSQRT) +DAG_FUNCTION(tan, 1, 1, experimental_constrained_tan, FTAN) DAG_FUNCTION(trunc, 1, 0, experimental_constrained_trunc, FTRUNC) // This is definition for fmuladd intrinsic function, that is converted into diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 107442623ab7b..4c506a6ace23e 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1218,6 +1218,10 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_tan : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_pow : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 87d737d7ffe61..c4f819f5fcdd2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -728,14 +728,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote); } - for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, - ISD::FCOS, ISD::FSIN, ISD::FSINCOS, - ISD::FTAN, ISD::FEXP, ISD::FEXP2, - ISD::FEXP10, ISD::FLOG, ISD::FLOG2, - ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW, - ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN, - ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG, - ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) { + for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, + ISD::FCOS, ISD::FSIN, ISD::FSINCOS, + ISD::FTAN, ISD::FEXP, ISD::FEXP2, + ISD::FEXP10, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW, + ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN, + ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG, + ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) { setOperationAction(Op, MVT::f16, Promote); setOperationAction(Op, MVT::v4f16, Expand); setOperationAction(Op, MVT::v8f16, Expand); diff --git a/llvm/test/Assembler/fp-intrinsics-attr.ll b/llvm/test/Assembler/fp-intrinsics-attr.ll index 6546d1a275c99..613630e1a2b4d 100644 --- a/llvm/test/Assembler/fp-intrinsics-attr.ll +++ b/llvm/test/Assembler/fp-intrinsics-attr.ll @@ -85,6 +85,11 @@ define void @func(double %a, double %b, double %c, i32 %i) strictfp { metadata !"round.dynamic", metadata !"fpexcept.strict") + %tan = call double @llvm.experimental.constrained.tan.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %pow = call double @llvm.experimental.constrained.pow.f64( double %a, double %b, metadata !"round.dynamic", @@ -244,6 +249,9 @@ declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.cos.f64({{.*}}) #[[ATTR1]] +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.tan.f64({{.*}}) #[[ATTR1]] + declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.pow.f64({{.*}}) #[[ATTR1]] diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll index 48062c9a54b5d..b09ed8d3eb764 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll @@ -338,6 +338,21 @@ define half @cos_f16(half %x) #0 { ret half %val } +define half @tan_f16(half %x) #0 { +; CHECK-LABEL: tan_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.tan.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + define half @pow_f16(half %x, half %y) #0 { ; CHECK-LABEL: pow_f16: ; CHECK: // %bb.0: @@ -1147,6 +1162,7 @@ declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.powi.f16(half, i32, metadata, metadata) declare half @llvm.experimental.constrained.sin.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.cos.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.tan.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.pow.f16(half, half, metadata, metadata) declare half @llvm.experimental.constrained.log.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.log10.f16(half, metadata, metadata) diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll index 685efbb7cad43..67d0b63f4076f 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -146,6 +146,13 @@ define float @cos_f32(float %x) #0 { ret float %val } +; CHECK-LABEL: tan_f32: +; CHECK: bl tanf +define float @tan_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.tan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: pow_f32: ; CHECK: bl powf define float @pow_f32(float %x, float %y) #0 { @@ -630,6 +637,13 @@ define double @cos_f64(double %x) #0 { ret double %val } +; CHECK-LABEL: tan_f64: +; CHECK: bl tan +define double @tan_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.tan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: pow_f64: ; CHECK: bl pow define double @pow_f64(double %x, double %y) #0 { @@ -1114,6 +1128,13 @@ define fp128 @cos_f128(fp128 %x) #0 { ret fp128 %val } +; CHECK-LABEL: tan_f128: +; CHECK: bl tanl +define fp128 @tan_f128(fp128 %x) #0 { + %val = call fp128 @llvm.experimental.constrained.tan.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret fp128 %val +} + ; CHECK-LABEL: pow_f128: ; CHECK: bl powl define fp128 @pow_f128(fp128 %x, fp128 %y) #0 { @@ -1491,6 +1512,13 @@ define <1 x double> @cos_v1f64(<1 x double> %x, <1 x double> %y) #0 { ret <1 x double> %val } +; CHECK-LABEL: tan_v1f64: +; CHECK: bl tan +define <1 x double> @tan_v1f64(<1 x double> %x, <1 x double> %y) #0 { + %val = call <1 x double> @llvm.experimental.constrained.tan.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + ; CHECK-LABEL: pow_v1f64: ; CHECK: bl pow define <1 x double> @pow_v1f64(<1 x double> %x, <1 x double> %y) #0 { @@ -1555,6 +1583,7 @@ declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata) declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) @@ -1599,6 +1628,7 @@ declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadat declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata) @@ -1643,6 +1673,7 @@ declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata declare fp128 @llvm.experimental.constrained.powi.f128(fp128, i32, metadata, metadata) declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.cos.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.pow.f128(fp128, fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.log.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.log10.f128(fp128, metadata, metadata) diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll index 64b22a5cc71bc..e286eb3226e46 100644 --- a/llvm/test/CodeGen/ARM/fp-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll @@ -139,6 +139,13 @@ define float @cos_f32(float %x) #0 { ret float %val } +; CHECK-LABEL: tan_f32: +; CHECK: bl tanf +define float @tan_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.tan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: pow_f32: ; CHECK: bl powf define float @pow_f32(float %x, float %y) #0 { @@ -596,6 +603,13 @@ define double @cos_f64(double %x) #0 { ret double %val } +; CHECK-LABEL: tan_f64: +; CHECK: bl tan +define double @tan_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.tan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: pow_f64: ; CHECK: bl pow define double @pow_f64(double %x, double %y) #0 { @@ -1023,6 +1037,7 @@ declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata) declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) @@ -1056,6 +1071,7 @@ declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadat declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll index 50ebe0471dcea..402ecb763d5b3 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll @@ -83,5 +83,55 @@ exit: ret void } +; Check constrained ops converted to call +define void @testTan(ptr %cast) strictfp { +; CHECK-LABEL: testTan: +; CHECK: # %bb.0: # %root +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: addi 30, 3, -8 +; CHECK-NEXT: li 29, 255 +; CHECK-NEXT: std 0, 80(1) +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB2_1: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: lfdu 1, 8(30) +; CHECK-NEXT: bl tan +; CHECK-NEXT: nop +; CHECK-NEXT: addi 29, 29, -1 +; CHECK-NEXT: stfd 1, 0(30) +; CHECK-NEXT: cmpldi 29, 0 +; CHECK-NEXT: bc 12, 1, .LBB2_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +root: + br label %for.body + +exit: + ret void + +for.body: + %i = phi i64 [ 0, %root ], [ %next, %for.body ] + %idx = getelementptr inbounds double, ptr %cast, i64 %i + %val = load double, ptr %idx + %tan = tail call nnan ninf nsz arcp double @llvm.experimental.constrained.tan.f64(double %val, metadata !"round.dynamic", metadata !"fpexcept.strict") + store double %tan, ptr %idx, align 8 + %next = add nuw nsw i64 %i, 1 + %cond = icmp eq i64 %next, 255 + br i1 %cond, label %exit, label %for.body +} + declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index 42972fe069df6..76f3dea5b7751 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -2066,6 +2066,50 @@ entry: ret i1 %conv } +define ppc_fp128 @test_tan_ppc_fp128(ppc_fp128 %first) #0 { +; PC64LE-LABEL: test_tan_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: std 0, 48(1) +; PC64LE-NEXT: bl tanl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_tan_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: std 0, 48(1) +; PC64LE9-NEXT: bl tanl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_tan_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: std 0, 128(1) +; PC64-NEXT: bl tanl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %tan = call ppc_fp128 @llvm.experimental.constrained.tan.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret ppc_fp128 %tan +} + attributes #0 = { nounwind strictfp } attributes #1 = { strictfp } @@ -2096,6 +2140,7 @@ declare ppc_fp128 @llvm.experimental.constrained.round.ppcf128(ppc_fp128, metada declare ppc_fp128 @llvm.experimental.constrained.sin.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.sqrt.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.fsub.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.tan.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.trunc.ppcf128(ppc_fp128, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128, metadata) diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 9cabe0c17d849..f217162782bfd 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -8302,6 +8302,357 @@ entry: ret <4 x float> %result } +define <1 x float> @constrained_vector_tan_v1f32(<1 x float> %x) #0 { +; PC64LE-LABEL: constrained_vector_tan_v1f32: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: std 0, 48(1) +; PC64LE-NEXT: bl tanf +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_tan_v1f32: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: std 0, 48(1) +; PC64LE9-NEXT: bl tanf +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %tan = call <1 x float> @llvm.experimental.constrained.tan.v1f32( + <1 x float> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <1 x float> %tan +} + +define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 { +; PC64LE-LABEL: constrained_vector_tan_v2f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 96(1) +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 31, 2 +; PC64LE-NEXT: xxlor 1, 63, 63 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 62, 1, 1 +; PC64LE-NEXT: xxswapd 1, 63 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 34, 62, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_tan_v2f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: vmr 31, 2 +; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 +; PC64LE9-NEXT: xxswapd 1, 63 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 62, 1 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %tan = call <2 x double> @llvm.experimental.constrained.tan.v2f64( + <2 x double> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <2 x double> %tan +} + +define <3 x float> @constrained_vector_tan_v3f32(<3 x float> %x) #0 { +; PC64LE-LABEL: constrained_vector_tan_v3f32: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: xxsldwi 0, 34, 34, 1 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 96(1) +; PC64LE-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE-NEXT: xscvspdpn 1, 0 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 31, 2 +; PC64LE-NEXT: bl tanf +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 +; PC64LE-NEXT: fmr 31, 1 +; PC64LE-NEXT: xscvspdpn 1, 0 +; PC64LE-NEXT: bl tanf +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxsldwi 0, 63, 63, 3 +; PC64LE-NEXT: fmr 30, 1 +; PC64LE-NEXT: xscvspdpn 1, 0 +; PC64LE-NEXT: bl tanf +; PC64LE-NEXT: nop +; PC64LE-NEXT: xscvdpspn 0, 1 +; PC64LE-NEXT: xscvdpspn 1, 30 +; PC64LE-NEXT: addis 3, 2, .LCPI189_0@toc@ha +; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE-NEXT: xscvdpspn 36, 31 +; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE-NEXT: addi 3, 3, .LCPI189_0@toc@l +; PC64LE-NEXT: xxmrghw 34, 1, 0 +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 35, 0 +; PC64LE-NEXT: vperm 2, 4, 2, 3 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_tan_v3f32: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1 +; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stfd 30, 48(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill +; PC64LE9-NEXT: vmr 31, 2 +; PC64LE9-NEXT: xscvspdpn 1, 0 +; PC64LE9-NEXT: bl tanf +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xxswapd 0, 63 +; PC64LE9-NEXT: fmr 31, 1 +; PC64LE9-NEXT: xscvspdpn 1, 0 +; PC64LE9-NEXT: bl tanf +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xxsldwi 0, 63, 63, 3 +; PC64LE9-NEXT: fmr 30, 1 +; PC64LE9-NEXT: xscvspdpn 1, 0 +; PC64LE9-NEXT: bl tanf +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: xscvdpspn 1, 30 +; PC64LE9-NEXT: addis 3, 2, .LCPI189_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload +; PC64LE9-NEXT: addi 3, 3, .LCPI189_0@toc@l +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %tan = call <3 x float> @llvm.experimental.constrained.tan.v3f32( + <3 x float> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <3 x float> %tan +} + +define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 { +; PC64LE-LABEL: constrained_vector_tan_v3f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 96(1) +; PC64LE-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; PC64LE-NEXT: fmr 30, 2 +; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE-NEXT: fmr 31, 3 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 63, 1, 1 +; PC64LE-NEXT: fmr 1, 30 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 +; PC64LE-NEXT: fmr 1, 31 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: xxswapd 1, 63 +; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_tan_v3f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stfd 30, 48(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill +; PC64LE9-NEXT: fmr 31, 3 +; PC64LE9-NEXT: fmr 30, 2 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 +; PC64LE9-NEXT: fmr 1, 30 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 +; PC64LE9-NEXT: fmr 1, 31 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: fmr 3, 1 +; PC64LE9-NEXT: xxswapd 1, 63 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %tan = call <3 x double> @llvm.experimental.constrained.tan.v3f64( + <3 x double> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <3 x double> %tan +} + +define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 { +; PC64LE-LABEL: constrained_vector_tan_v4f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -96(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 30, 2 +; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: xxlor 1, 62, 62 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 31, 3 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 61, 1, 1 +; PC64LE-NEXT: xxswapd 1, 62 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 62, 61, 1 +; PC64LE-NEXT: xxlor 1, 63, 63 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 61, 1, 1 +; PC64LE-NEXT: xxswapd 1, 63 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 35, 61, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 96 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_tan_v4f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: std 0, 96(1) +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: vmr 30, 2 +; PC64LE9-NEXT: xscpsgndp 1, 62, 62 +; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill +; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 61, 1, 1 +; PC64LE9-NEXT: xxswapd 1, 62 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 62, 61, 1 +; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 61, 1, 1 +; PC64LE9-NEXT: xxswapd 1, 63 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 61, 1 +; PC64LE9-NEXT: vmr 2, 30 +; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 61, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 80 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %tan = call <4 x double> @llvm.experimental.constrained.tan.v4f64( + <4 x double> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <4 x double> %tan +} + attributes #0 = { nounwind strictfp noimplicitfloat } attributes #1 = { strictfp } @@ -8316,6 +8667,7 @@ declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) @@ -8361,6 +8713,7 @@ declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x f declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) @@ -8414,6 +8767,8 @@ declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metada declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) @@ -8470,6 +8825,7 @@ declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll index 38215860193ea..4cb6191e7322e 100644 --- a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll @@ -375,6 +375,66 @@ define double @sincos_f64(double %a) nounwind strictfp { ret double %3 } +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) + +define double @tan_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: tan_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call tan +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: tan_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call tan +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: tan_f64: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: call tan +; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: tan_f64: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINXZDINX-NEXT: call tan +; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV64IZFINXZDINX-NEXT: ret +; +; RV32I-LABEL: tan_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call tan +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: tan_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call tan +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.tan.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) define double @pow_f64(double %a, double %b) nounwind strictfp { diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll index 626db1985bfc7..e4be5074cb800 100644 --- a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll @@ -354,6 +354,66 @@ define float @sincos_f32(float %a) nounwind strictfp { ret float %3 } +declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) + +define float @tan_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: tan_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call tanf +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: tan_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call tanf +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32IZFINX-LABEL: tan_f32: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: addi sp, sp, -16 +; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: call tanf +; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: addi sp, sp, 16 +; RV32IZFINX-NEXT: ret +; +; RV64IZFINX-LABEL: tan_f32: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: addi sp, sp, -16 +; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINX-NEXT: call tanf +; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINX-NEXT: addi sp, sp, 16 +; RV64IZFINX-NEXT: ret +; +; RV32I-LABEL: tan_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call tanf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: tan_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call tanf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.tan.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) define float @pow_f32(float %a, float %b) nounwind strictfp { diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index 9d77744f18ca1..4a109ee96a3d3 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -6222,6 +6222,323 @@ entry: ret void } +define <1 x float> @constrained_vector_tan_v1f32() #0 { +; S390X-LABEL: constrained_vector_tan_v1f32: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -160 +; S390X-NEXT: .cfi_def_cfa_offset 320 +; S390X-NEXT: larl %r1, .LCPI119_0 +; S390X-NEXT: le %f0, 0(%r1) +; S390X-NEXT: brasl %r14, tanf@PLT +; S390X-NEXT: lmg %r14, %r15, 272(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_tan_v1f32: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -160 +; SZ13-NEXT: .cfi_def_cfa_offset 320 +; SZ13-NEXT: larl %r1, .LCPI119_0 +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tanf@PLT +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vlr %v24, %v0 +; SZ13-NEXT: lmg %r14, %r15, 272(%r15) +; SZ13-NEXT: br %r14 +entry: + %tan = call <1 x float> @llvm.experimental.constrained.tan.v1f32( + <1 x float> <float 42.0>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %tan +} + +define <2 x double> @constrained_vector_tan_v2f64() #0 { +; S390X-LABEL: constrained_vector_tan_v2f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -168 +; S390X-NEXT: .cfi_def_cfa_offset 328 +; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: larl %r1, .LCPI120_0 +; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: larl %r1, .LCPI120_1 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldr %f8, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: ldr %f2, %f8 +; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 280(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_tan_v2f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -176 +; SZ13-NEXT: .cfi_def_cfa_offset 336 +; SZ13-NEXT: larl %r1, .LCPI120_0 +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: larl %r1, .LCPI120_1 +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v24, %v0, %v1 +; SZ13-NEXT: lmg %r14, %r15, 288(%r15) +; SZ13-NEXT: br %r14 +entry: + %tan = call <2 x double> @llvm.experimental.constrained.tan.v2f64( + <2 x double> <double 42.0, double 42.1>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %tan +} + +define <3 x float> @constrained_vector_tan_v3f32() #0 { +; S390X-LABEL: constrained_vector_tan_v3f32: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -176 +; S390X-NEXT: .cfi_def_cfa_offset 336 +; S390X-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: larl %r1, .LCPI121_0 +; S390X-NEXT: le %f0, 0(%r1) +; S390X-NEXT: brasl %r14, tanf@PLT +; S390X-NEXT: larl %r1, .LCPI121_1 +; S390X-NEXT: le %f1, 0(%r1) +; S390X-NEXT: ler %f8, %f0 +; S390X-NEXT: ler %f0, %f1 +; S390X-NEXT: brasl %r14, tanf@PLT +; S390X-NEXT: larl %r1, .LCPI121_2 +; S390X-NEXT: le %f1, 0(%r1) +; S390X-NEXT: ler %f9, %f0 +; S390X-NEXT: ler %f0, %f1 +; S390X-NEXT: brasl %r14, tanf@PLT +; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 +; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 288(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_tan_v3f32: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -192 +; SZ13-NEXT: .cfi_def_cfa_offset 352 +; SZ13-NEXT: larl %r1, .LCPI121_0 +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tanf@PLT +; SZ13-NEXT: larl %r1, .LCPI121_1 +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tanf@PLT +; SZ13-NEXT: larl %r1, .LCPI121_2 +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tanf@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vmrhf %v0, %v1, %v0 +; SZ13-NEXT: vl %v1, 176(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: vrepf %v1, %v1, 0 +; SZ13-NEXT: vmrhg %v24, %v0, %v1 +; SZ13-NEXT: lmg %r14, %r15, 304(%r15) +; SZ13-NEXT: br %r14 +entry: + %tan = call <3 x float> @llvm.experimental.constrained.tan.v3f32( + <3 x float> <float 42.0, float 43.0, float 44.0>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %tan +} + +define void @constrained_vector_tan_v3f64(ptr %a) #0 { +; S390X-LABEL: constrained_vector_tan_v3f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r13, %r15, 104(%r15) +; S390X-NEXT: .cfi_offset %r13, -56 +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -184 +; S390X-NEXT: .cfi_def_cfa_offset 344 +; S390X-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: .cfi_offset %f10, -184 +; S390X-NEXT: lgr %r13, %r2 +; S390X-NEXT: ld %f8, 0(%r2) +; S390X-NEXT: ld %f0, 16(%r2) +; S390X-NEXT: ld %f9, 8(%r2) +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: ldr %f10, %f0 +; S390X-NEXT: ldr %f0, %f9 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: ldr %f9, %f0 +; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: std %f0, 0(%r13) +; S390X-NEXT: std %f9, 8(%r13) +; S390X-NEXT: std %f10, 16(%r13) +; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r13, %r15, 288(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_tan_v3f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r13, %r15, 104(%r15) +; SZ13-NEXT: .cfi_offset %r13, -56 +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -200 +; SZ13-NEXT: .cfi_def_cfa_offset 360 +; SZ13-NEXT: std %f8, 192(%r15) # 8-byte Folded Spill +; SZ13-NEXT: .cfi_offset %f8, -168 +; SZ13-NEXT: vl %v0, 0(%r2), 4 +; SZ13-NEXT: ld %f8, 16(%r2) +; SZ13-NEXT: lgr %r13, %r2 +; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: vl %v0, 176(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: vrepg %v0, %v0, 1 +; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v1, %v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ldr %f0, %f8 +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: std %f0, 16(%r13) +; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Folded Reload +; SZ13-NEXT: vst %v0, 0(%r13), 4 +; SZ13-NEXT: lmg %r13, %r15, 304(%r15) +; SZ13-NEXT: br %r14 +entry: + %b = load <3 x double>, ptr %a + %tan = call <3 x double> @llvm.experimental.constrained.tan.v3f64( + <3 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store <3 x double> %tan, ptr %a + ret void +} + +define <4 x double> @constrained_vector_tan_v4f64() #0 { +; S390X-LABEL: constrained_vector_tan_v4f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -184 +; S390X-NEXT: .cfi_def_cfa_offset 344 +; S390X-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: .cfi_offset %f10, -184 +; S390X-NEXT: larl %r1, .LCPI123_0 +; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: larl %r1, .LCPI123_1 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldr %f8, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: larl %r1, .LCPI123_2 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldr %f9, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: larl %r1, .LCPI123_3 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldr %f10, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 +; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 296(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_tan_v4f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -192 +; SZ13-NEXT: .cfi_def_cfa_offset 352 +; SZ13-NEXT: larl %r1, .LCPI123_0 +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: larl %r1, .LCPI123_1 +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: larl %r1, .LCPI123_2 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: larl %r1, .LCPI123_3 +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: vl %v1, 176(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: vl %v24, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v26, %v0, %v1 +; SZ13-NEXT: lmg %r14, %r15, 304(%r15) +; SZ13-NEXT: br %r14 +entry: + %tan = call <4 x double> @llvm.experimental.constrained.tan.v4f64( + <4 x double> <double 42.0, double 42.1, + double 42.2, double 42.3>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %tan +} + attributes #0 = { strictfp } declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) @@ -6234,6 +6551,7 @@ declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) @@ -6260,6 +6578,7 @@ declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x f declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) @@ -6296,6 +6615,8 @@ declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metada declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) @@ -6335,6 +6656,7 @@ declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index d2b45ee1e03e6..8c48e6f9da80a 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -2758,6 +2758,58 @@ entry: ret float %result } +; Verify that tan(42.0) isn't simplified when the rounding mode is unknown. +define double @ftan() #0 { +; X87-LABEL: ftan: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll tan +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: ftan: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll tan +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: ftan: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq tan@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: ftan: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.tan.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + + attributes #0 = { strictfp } @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" @@ -2771,6 +2823,7 @@ declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll index 1bc308bef8ccc..cfec52c0e6886 100644 --- a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll +++ b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll @@ -160,6 +160,23 @@ define float @sin(float %x) #0 { ret float %result } +define float @tan(float %x) #0 { +; CHECK-LABEL: tan: +; CHECK: # %bb.0: +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait +; CHECK-NEXT: calll _tan +; CHECK-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: retl + %result = call float @llvm.experimental.constrained.tan.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %result +} + attributes #0 = { strictfp } declare float @llvm.experimental.constrained.ceil.f32(float, metadata) @@ -171,3 +188,4 @@ declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index f1d473f81a9fa..bd51f553587db 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -1047,6 +1047,46 @@ entry: ret fp128 %sqrt } +define fp128 @tan(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: tan: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq tanl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: tan: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq tanf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: tan: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll tanl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %tan = call fp128 @llvm.experimental.constrained.tan.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %tan +} + define fp128 @trunc(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: trunc: ; ANDROID: # %bb.0: # %entry @@ -1663,6 +1703,7 @@ declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata) declare fp128 @llvm.experimental.constrained.roundeven.f128(fp128, metadata) declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata) declare i32 @llvm.experimental.constrained.lrint.i32.f128(fp128, metadata, metadata) declare i64 @llvm.experimental.constrained.llrint.i64.f128(fp128, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll index 4d50b15e5c185..89729975cfd61 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll @@ -504,6 +504,31 @@ entry: ret x86_fp80 %sin } +define x86_fp80 @tan(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: tan: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll tanl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: tan: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq tanl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %tan = call x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %tan +} + define x86_fp80 @trunc(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: trunc: ; X86: # %bb.0: # %entry @@ -650,6 +675,7 @@ declare x86_fp80 @llvm.experimental.constrained.rint.f80(x86_fp80, metadata, met declare x86_fp80 @llvm.experimental.constrained.round.f80(x86_fp80, metadata) declare x86_fp80 @llvm.experimental.constrained.roundeven.f80(x86_fp80, metadata) declare x86_fp80 @llvm.experimental.constrained.sin.f80(x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.trunc.f80(x86_fp80, metadata) declare i32 @llvm.experimental.constrained.lrint.i32.f80(x86_fp80, metadata, metadata) declare i64 @llvm.experimental.constrained.llrint.i64.f80(x86_fp80, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index 0adb9ddfc426a..d71fd470651cf 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -7771,6 +7771,234 @@ define <16 x float> @vpaddd_mask_test(<16 x float> %i, <16 x float> %j, <16 x i3 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %i ret <16 x float> %r } + +define <1 x float> @constrained_vector_tan_v1f32() #0 { +; CHECK-LABEL: constrained_vector_tan_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tan_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tan = call <1 x float> @llvm.experimental.constrained.tan.v1f32( + <1 x float> <float 42.0>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %tan +} + +define <2 x double> @constrained_vector_tan_v2f64() #0 { +; CHECK-LABEL: constrained_vector_tan_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tan_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tan = call <2 x double> @llvm.experimental.constrained.tan.v2f64( + <2 x double> <double 42.0, double 42.1>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %tan +} + +define <3 x float> @constrained_vector_tan_v3f32() #0 { +; CHECK-LABEL: constrained_vector_tan_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tan_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tan = call <3 x float> @llvm.experimental.constrained.tan.v3f32( + <3 x float> <float 42.0, float 43.0, float 44.0>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %tan +} + +define <3 x double> @constrained_vector_tan_v3f64() #0 { +; CHECK-LABEL: constrained_vector_tan_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tan_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tan = call <3 x double> @llvm.experimental.constrained.tan.v3f64( + <3 x double> <double 42.0, double 42.1, double 42.2>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %tan +} + +define <4 x double> @constrained_vector_tan_v4f64() #0 { +; CHECK-LABEL: constrained_vector_tan_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tan_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tan = call <4 x double> @llvm.experimental.constrained.tan.v4f64( + <4 x double> <double 42.0, double 42.1, + double 42.2, double 42.3>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %tan +} + + + declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) attributes #0 = { strictfp } @@ -7786,6 +8014,7 @@ declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) @@ -7829,6 +8058,7 @@ declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x f declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) @@ -7882,6 +8112,8 @@ declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metada declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) @@ -7938,6 +8170,7 @@ declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll index b92408a1bf1cd..78275a16d3e8f 100644 --- a/llvm/test/Feature/fp-intrinsics.ll +++ b/llvm/test/Feature/fp-intrinsics.ll @@ -151,6 +151,17 @@ entry: ret double %result } +; Verify that tan(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: ftan +; CHECK: call double @llvm.experimental.constrained.tan +define double @ftan() #0 { +entry: + %result = call double @llvm.experimental.constrained.tan.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + ; Verify that exp(42.0) isn't simplified when the rounding mode is unknown. ; CHECK-LABEL: f10 ; CHECK: call double @llvm.experimental.constrained.exp @@ -407,6 +418,7 @@ declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits