https://github.com/tex3d updated https://github.com/llvm/llvm-project/pull/113636
>From a6776121bb118fe4083ccb94fa582cca1aef7f9b Mon Sep 17 00:00:00 2001 From: Tex Riddell <t...@microsoft.com> Date: Tue, 15 Oct 2024 16:18:44 -0700 Subject: [PATCH 1/6] Emit constrained atan2 intrinsic for clang builtin This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 - `Builtins.td` - Add f16 support for libm atan2 builtin - `CGBuiltin.cpp` - Emit constraint atan2 intrinsic for clang builtin Part of Implement the atan2 HLSL Function #70096. --- clang/include/clang/Basic/Builtins.td | 6 +++--- clang/lib/CodeGen/CGBuiltin.cpp | 11 ++++++++++ clang/test/CodeGen/X86/math-builtins.c | 14 ++++++------- .../test/CodeGen/constrained-math-builtins.c | 7 +++++++ clang/test/CodeGen/libcalls.c | 7 +++---- clang/test/CodeGen/math-libcalls.c | 20 +++++++++---------- .../test/CodeGenCXX/builtin-calling-conv.cpp | 10 +++++----- clang/test/CodeGenOpenCL/builtins-f16.cl | 3 +++ 8 files changed, 49 insertions(+), 29 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 87a798183d6e19..305b085f69420a 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -227,10 +227,10 @@ def FminimumNumF16F128 : Builtin, F16F128MathTemplate { let Prototype = "T(T, T)"; } -def Atan2F128 : Builtin { - let Spellings = ["__builtin_atan2f128"]; +def Atan2F16F128 : Builtin, F16F128MathTemplate { + let Spellings = ["__builtin_atan2"]; let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions]; - let Prototype = "__float128(__float128, __float128)"; + let Prototype = "T(T, T)"; } def CopysignF16 : Builtin { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5c3df5124517d6..9b63fcbedc8c45 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2798,6 +2798,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan)); + case Builtin::BIatan2: + case Builtin::BIatan2f: + case Builtin::BIatan2l: + case Builtin::BI__builtin_atan2: + case Builtin::BI__builtin_atan2f: + case Builtin::BI__builtin_atan2f16: + case Builtin::BI__builtin_atan2l: + case Builtin::BI__builtin_atan2f128: + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::atan2, Intrinsic::experimental_constrained_atan2)); + case Builtin::BIceil: case Builtin::BIceilf: case Builtin::BIceill: diff --git a/clang/test/CodeGen/X86/math-builtins.c b/clang/test/CodeGen/X86/math-builtins.c index 48465df21cca19..bf107437fc63a3 100644 --- a/clang/test/CodeGen/X86/math-builtins.c +++ b/clang/test/CodeGen/X86/math-builtins.c @@ -45,10 +45,10 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { __builtin_atan2(f,f); __builtin_atan2f(f,f) ; __builtin_atan2l(f, f); __builtin_atan2f128(f,f); -// NO__ERRNO: declare double @atan2(double noundef, double noundef) [[READNONE:#[0-9]+]] -// NO__ERRNO: declare float @atan2f(float noundef, float noundef) [[READNONE]] -// NO__ERRNO: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[READNONE]] -// NO__ERRNO: declare fp128 @atan2f128(fp128 noundef, fp128 noundef) [[READNONE]] +// NO__ERRNO: declare double @llvm.atan2.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]] +// NO__ERRNO: declare float @llvm.atan2.f32(float, float) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare x86_fp80 @llvm.atan2.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare fp128 @llvm.atan2.f128(fp128, fp128) [[READNONE_INTRINSIC]] // HAS_ERRNO: declare double @atan2(double noundef, double noundef) [[NOT_READNONE]] // HAS_ERRNO: declare float @atan2f(float noundef, float noundef) [[NOT_READNONE]] // HAS_ERRNO: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[NOT_READNONE]] @@ -56,7 +56,7 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { __builtin_copysign(f,f); __builtin_copysignf(f,f); __builtin_copysignl(f,f); __builtin_copysignf128(f,f); -// NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]] +// NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC]] // NO__ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC]] // NO__ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]] // NO__ERRNO: declare fp128 @llvm.copysign.f128(fp128, fp128) [[READNONE_INTRINSIC]] @@ -179,7 +179,7 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { __builtin_acosh(f); __builtin_acoshf(f); __builtin_acoshl(f); __builtin_acoshf128(f); -// NO__ERRNO: declare double @acosh(double noundef) [[READNONE]] +// NO__ERRNO: declare double @acosh(double noundef) [[READNONE:#[0-9]+]] // NO__ERRNO: declare float @acoshf(float noundef) [[READNONE]] // NO__ERRNO: declare x86_fp80 @acoshl(x86_fp80 noundef) [[READNONE]] // NO__ERRNO: declare fp128 @acoshf128(fp128 noundef) [[READNONE]] @@ -721,10 +721,10 @@ __builtin_trunc(f); __builtin_truncf(f); __builtin_truncl(f); __builtin // HAS_ERRNO: declare fp128 @llvm.trunc.f128(fp128) [[READNONE_INTRINSIC]] }; -// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } // NO__ERRNO: attributes [[PURE]] = { {{.*}}memory(read){{.*}} } +// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } // HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } diff --git a/clang/test/CodeGen/constrained-math-builtins.c b/clang/test/CodeGen/constrained-math-builtins.c index aa77620b445356..68b9e75283c547 100644 --- a/clang/test/CodeGen/constrained-math-builtins.c +++ b/clang/test/CodeGen/constrained-math-builtins.c @@ -57,6 +57,13 @@ __builtin_atan(f); __builtin_atanf(f); __builtin_atanl(f); __builti // CHECK: call x86_fp80 @llvm.experimental.constrained.atan.f80(x86_fp80 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK: call fp128 @llvm.experimental.constrained.atan.f128(fp128 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") +__builtin_atan2(f,f); __builtin_atan2f(f,f); __builtin_atan2l(f,f); __builtin_atan2f128(f,f); + +// CHECK: call double @llvm.experimental.constrained.atan2.f64(double %{{.*}}, double %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") +// CHECK: call float @llvm.experimental.constrained.atan2.f32(float %{{.*}}, float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") +// CHECK: call x86_fp80 @llvm.experimental.constrained.atan2.f80(x86_fp80 %{{.*}}, x86_fp80 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") +// CHECK: call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %{{.*}}, fp128 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + __builtin_ceil(f); __builtin_ceilf(f); __builtin_ceill(f); __builtin_ceilf128(f); // CHECK: call double @llvm.experimental.constrained.ceil.f64(double %{{.*}}, metadata !"fpexcept.strict") diff --git a/clang/test/CodeGen/libcalls.c b/clang/test/CodeGen/libcalls.c index b1637121127c5b..1e4b06e34aaf92 100644 --- a/clang/test/CodeGen/libcalls.c +++ b/clang/test/CodeGen/libcalls.c @@ -95,9 +95,9 @@ void test_builtins(double d, float f, long double ld) { double atan2_ = atan2(d, 2); long double atan2l_ = atan2l(ld, ld); float atan2f_ = atan2f(f, f); -// CHECK-NO: declare double @atan2(double noundef, double noundef) [[NUW_RN:#[0-9]+]] -// CHECK-NO: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[NUW_RN]] -// CHECK-NO: declare float @atan2f(float noundef, float noundef) [[NUW_RN]] +// CHECK-NO: declare double @llvm.atan2.f64(double, double) [[NUW_RNI]] +// CHECK-NO: declare x86_fp80 @llvm.atan2.f80(x86_fp80, x86_fp80) [[NUW_RNI]] +// CHECK-NO: declare float @llvm.atan2.f32(float, float) [[NUW_RNI]] // CHECK-YES: declare double @atan2(double noundef, double noundef) [[NUW]] // CHECK-YES: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[NUW]] // CHECK-YES: declare float @atan2f(float noundef, float noundef) [[NUW]] @@ -124,5 +124,4 @@ void test_builtins(double d, float f, long double ld) { } // CHECK-YES: attributes [[NUW]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } -// CHECK-NO-DAG: attributes [[NUW_RN]] = { nounwind willreturn memory(none){{.*}} } // CHECK-NO-DAG: attributes [[NUW_RNI]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/clang/test/CodeGen/math-libcalls.c b/clang/test/CodeGen/math-libcalls.c index 2226212eca94ee..bcc61c8f046b43 100644 --- a/clang/test/CodeGen/math-libcalls.c +++ b/clang/test/CodeGen/math-libcalls.c @@ -23,19 +23,19 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { atan2(f,f); atan2f(f,f) ; atan2l(f, f); - // NO__ERRNO: declare double @atan2(double noundef, double noundef) [[READNONE:#[0-9]+]] - // NO__ERRNO: declare float @atan2f(float noundef, float noundef) [[READNONE]] - // NO__ERRNO: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[READNONE]] + // NO__ERRNO: declare double @llvm.atan2.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]] + // NO__ERRNO: declare float @llvm.atan2.f32(float, float) [[READNONE_INTRINSIC]] + // NO__ERRNO: declare x86_fp80 @llvm.atan2.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]] // HAS_ERRNO: declare double @atan2(double noundef, double noundef) [[NOT_READNONE]] // HAS_ERRNO: declare float @atan2f(float noundef, float noundef) [[NOT_READNONE]] // HAS_ERRNO: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[NOT_READNONE]] - // HAS_MAYTRAP: declare double @atan2(double noundef, double noundef) [[NOT_READNONE:#[0-9]+]] - // HAS_MAYTRAP: declare float @atan2f(float noundef, float noundef) [[NOT_READNONE]] - // HAS_MAYTRAP: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[NOT_READNONE]] + // HAS_MAYTRAP: declare double @llvm.experimental.constrained.atan2.f64( + // HAS_MAYTRAP: declare float @llvm.experimental.constrained.atan2.f32( + // HAS_MAYTRAP: declare x86_fp80 @llvm.experimental.constrained.atan2.f80( copysign(f,f); copysignf(f,f);copysignl(f,f); - // NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]] + // NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC]] // NO__ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC]] // NO__ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]] // HAS_ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]] @@ -65,13 +65,13 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { // HAS_ERRNO: declare double @frexp(double noundef, ptr noundef) [[NOT_READNONE]] // HAS_ERRNO: declare float @frexpf(float noundef, ptr noundef) [[NOT_READNONE]] // HAS_ERRNO: declare x86_fp80 @frexpl(x86_fp80 noundef, ptr noundef) [[NOT_READNONE]] - // HAS_MAYTRAP: declare double @frexp(double noundef, ptr noundef) [[NOT_READNONE]] + // HAS_MAYTRAP: declare double @frexp(double noundef, ptr noundef) [[NOT_READNONE:#[0-9]+]] // HAS_MAYTRAP: declare float @frexpf(float noundef, ptr noundef) [[NOT_READNONE]] // HAS_MAYTRAP: declare x86_fp80 @frexpl(x86_fp80 noundef, ptr noundef) [[NOT_READNONE]] ldexp(f,f); ldexpf(f,f); ldexpl(f,f); - // NO__ERRNO: declare double @ldexp(double noundef, i32 noundef) [[READNONE]] + // NO__ERRNO: declare double @ldexp(double noundef, i32 noundef) [[READNONE:#[0-9]+]] // NO__ERRNO: declare float @ldexpf(float noundef, i32 noundef) [[READNONE]] // NO__ERRNO: declare x86_fp80 @ldexpl(x86_fp80 noundef, i32 noundef) [[READNONE]] // HAS_ERRNO: declare double @ldexp(double noundef, i32 noundef) [[NOT_READNONE]] @@ -719,9 +719,9 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { // HAS_ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC]] }; -// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } +// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[READONLY]] = { {{.*}}memory(read){{.*}} } // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } diff --git a/clang/test/CodeGenCXX/builtin-calling-conv.cpp b/clang/test/CodeGenCXX/builtin-calling-conv.cpp index 7020d1e0a24144..6b1c308344e05f 100644 --- a/clang/test/CodeGenCXX/builtin-calling-conv.cpp +++ b/clang/test/CodeGenCXX/builtin-calling-conv.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-linux-pc -DREDECL -emit-llvm %s -o - | FileCheck %s -check-prefix LINUX -// RUN: %clang_cc1 -triple spir-unknown-unknown -DREDECL -DSPIR -emit-llvm %s -o - | FileCheck %s -check-prefix SPIR -// RUN: %clang_cc1 -triple x86_64-linux-pc -emit-llvm %s -o - | FileCheck %s -check-prefix LINUX -// RUN: %clang_cc1 -triple spir-unknown-unknown -DSPIR -emit-llvm %s -o - | FileCheck %s -check-prefix SPIR -// RUN: %clang_cc1 -triple i386-windows-pc -fdefault-calling-conv=stdcall -emit-llvm %s -o - | FileCheck %s -check-prefix WIN32 +// RUN: %clang_cc1 -triple x86_64-linux-pc -DREDECL -emit-llvm -fmath-errno %s -o - | FileCheck %s -check-prefix LINUX +// RUN: %clang_cc1 -triple spir-unknown-unknown -DREDECL -DSPIR -emit-llvm -fmath-errno %s -o - | FileCheck %s -check-prefix SPIR +// RUN: %clang_cc1 -triple x86_64-linux-pc -emit-llvm -fmath-errno %s -o - | FileCheck %s -check-prefix LINUX +// RUN: %clang_cc1 -triple spir-unknown-unknown -DSPIR -emit-llvm -fmath-errno %s -o - | FileCheck %s -check-prefix SPIR +// RUN: %clang_cc1 -triple i386-windows-pc -fdefault-calling-conv=stdcall -emit-llvm -fmath-errno %s -o - | FileCheck %s -check-prefix WIN32 #ifdef REDECL namespace std { diff --git a/clang/test/CodeGenOpenCL/builtins-f16.cl b/clang/test/CodeGenOpenCL/builtins-f16.cl index 8150bc1ac9e2d7..e8b62fe0830cdb 100644 --- a/clang/test/CodeGenOpenCL/builtins-f16.cl +++ b/clang/test/CodeGenOpenCL/builtins-f16.cl @@ -15,6 +15,9 @@ void test_half_builtins(half h0, half h1, half h2, int i0) { // CHECK: call half @llvm.atan.f16(half %h0) res = __builtin_atanf16(h0); + // CHECK: call half @llvm.atan2.f16(half %h0, half %h1) + res = __builtin_atan2f16(h0, h1); + // CHECK: call half @llvm.copysign.f16(half %h0, half %h1) res = __builtin_copysignf16(h0, h1); >From 75c5d9794392f32eb4c4f29fb9fcd46203a64ea2 Mon Sep 17 00:00:00 2001 From: Tex Riddell <t...@microsoft.com> Date: Tue, 15 Oct 2024 19:59:01 -0700 Subject: [PATCH 2/6] Use erff instead of atan2 for builtin calling convention check --- .../test/CodeGenCXX/builtin-calling-conv.cpp | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/clang/test/CodeGenCXX/builtin-calling-conv.cpp b/clang/test/CodeGenCXX/builtin-calling-conv.cpp index 6b1c308344e05f..92d698b43be0ab 100644 --- a/clang/test/CodeGenCXX/builtin-calling-conv.cpp +++ b/clang/test/CodeGenCXX/builtin-calling-conv.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-linux-pc -DREDECL -emit-llvm -fmath-errno %s -o - | FileCheck %s -check-prefix LINUX -// RUN: %clang_cc1 -triple spir-unknown-unknown -DREDECL -DSPIR -emit-llvm -fmath-errno %s -o - | FileCheck %s -check-prefix SPIR -// RUN: %clang_cc1 -triple x86_64-linux-pc -emit-llvm -fmath-errno %s -o - | FileCheck %s -check-prefix LINUX -// RUN: %clang_cc1 -triple spir-unknown-unknown -DSPIR -emit-llvm -fmath-errno %s -o - | FileCheck %s -check-prefix SPIR -// RUN: %clang_cc1 -triple i386-windows-pc -fdefault-calling-conv=stdcall -emit-llvm -fmath-errno %s -o - | FileCheck %s -check-prefix WIN32 +// RUN: %clang_cc1 -triple x86_64-linux-pc -DREDECL -emit-llvm %s -o - | FileCheck %s -check-prefix LINUX +// RUN: %clang_cc1 -triple spir-unknown-unknown -DREDECL -DSPIR -emit-llvm %s -o - | FileCheck %s -check-prefix SPIR +// RUN: %clang_cc1 -triple x86_64-linux-pc -emit-llvm %s -o - | FileCheck %s -check-prefix LINUX +// RUN: %clang_cc1 -triple spir-unknown-unknown -DSPIR -emit-llvm %s -o - | FileCheck %s -check-prefix SPIR +// RUN: %clang_cc1 -triple i386-windows-pc -fdefault-calling-conv=stdcall -emit-llvm %s -o - | FileCheck %s -check-prefix WIN32 #ifdef REDECL namespace std { @@ -13,7 +13,7 @@ using size_t = unsigned long; #endif // SPIR } // namespace std -float __builtin_atan2f(float, float); +float __builtin_erff(float); void *operator new(std::size_t); #endif // REDECL @@ -22,32 +22,32 @@ void foo(); void user() { int i; ::operator new(5); - (void)__builtin_atan2f(1.1, 2.2); + (void)__builtin_erff(1.1); foo(); } // LINUX: define{{.*}} void @_Z4userv() // LINUX: call noalias noundef nonnull ptr @_Znwm -// LINUX: call float @atan2f +// LINUX: call float @erff // LINUX: call void @_Z3foov // LINUX: declare noundef nonnull ptr @_Znwm(i64 noundef) -// LINUX: declare float @atan2f(float noundef, float noundef) +// LINUX: declare float @erff(float noundef) // LINUX: declare void @_Z3foov() // SPIR: define{{.*}} spir_func void @_Z4userv() // SPIR: call spir_func noalias noundef nonnull ptr @_Znwj -// SPIR: call spir_func float @atan2f +// SPIR: call spir_func float @erff // SPIR: call spir_func void @_Z3foov // SPIR: declare spir_func noundef nonnull ptr @_Znwj(i32 noundef) -// SPIR: declare spir_func float @atan2f(float noundef, float noundef) +// SPIR: declare spir_func float @erff(float noundef) // SPIR: declare spir_func void @_Z3foov() // Note: Windows /G options should not change the platform default calling // convention of builtins. // WIN32: define dso_local x86_stdcallcc void @"?user@@YGXXZ"() // WIN32: call noalias noundef nonnull ptr @"??2@YAPAXI@Z" -// WIN32: call float @atan2f +// WIN32: call float @erff // WIN32: call x86_stdcallcc void @"?foo@@YGXXZ" // WIN32: declare dso_local noundef nonnull ptr @"??2@YAPAXI@Z"( -// WIN32: declare dso_local float @atan2f(float noundef, float noundef) +// WIN32: declare dso_local float @erff(float noundef) // WIN32: declare dso_local x86_stdcallcc void @"?foo@@YGXXZ"() >From 64cade4652dcca48b353619948e78834aeb5f991 Mon Sep 17 00:00:00 2001 From: Tex Riddell <t...@microsoft.com> Date: Thu, 24 Oct 2024 20:00:06 -0700 Subject: [PATCH 3/6] clang-format --- clang/lib/CodeGen/CGBuiltin.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9b63fcbedc8c45..59ecc5c0b211a6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2807,7 +2807,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_atan2l: case Builtin::BI__builtin_atan2f128: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( - *this, E, Intrinsic::atan2, Intrinsic::experimental_constrained_atan2)); + *this, E, Intrinsic::atan2, + Intrinsic::experimental_constrained_atan2)); case Builtin::BIceil: case Builtin::BIceilf: >From f4ded92f66fbad449c31a7ed1729214022d80e58 Mon Sep 17 00:00:00 2001 From: Tex Riddell <t...@microsoft.com> Date: Mon, 11 Nov 2024 16:33:39 -0800 Subject: [PATCH 4/6] Update tests for constraint intrinsics on PowerPC, RISCV, SystemZ, ARM --- llvm/test/CodeGen/ARM/fp-intrinsics.ll | 16 + .../CodeGen/PowerPC/ctrloop-constrained-fp.ll | 57 +++ .../CodeGen/RISCV/double-intrinsics-strict.ll | 60 +++ .../vector-constrained-fp-intrinsics.ll | 391 ++++++++++++++++++ 4 files changed, 524 insertions(+) diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll index ca2dc701bd1fb3..93b6a58a22b6ce 100644 --- a/llvm/test/CodeGen/ARM/fp-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll @@ -146,6 +146,13 @@ define float @tan_f32(float %x) #0 { ret float %val } +; CHECK-LABEL: atan2_f32: +; CHECK: bl atan2f +define float @atan2_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.atan2.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: pow_f32: ; CHECK: bl powf define float @pow_f32(float %x, float %y) #0 { @@ -610,6 +617,13 @@ define double @tan_f64(double %x) #0 { ret double %val } +; CHECK-LABEL: atan2_f64: +; CHECK: bl atan2 +define double @atan2_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.atan2.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: pow_f64: ; CHECK: bl pow define double @pow_f64(double %x, double %y) #0 { @@ -1038,6 +1052,7 @@ declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, meta declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.atan2.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) @@ -1072,6 +1087,7 @@ declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, me declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll index 402ecb763d5b33..7966f8c0a93ec8 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll @@ -132,6 +132,63 @@ for.body: br i1 %cond, label %exit, label %for.body } +; Check constrained ops converted to call +define void @testAtan2(ptr %cast1, ptr %cast2) strictfp { +; CHECK-LABEL: testAtan2: +; CHECK: # %bb.0: # %root +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r28, -32 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: addi 30, 3, -8 +; CHECK-NEXT: addi 29, 4, -8 +; CHECK-NEXT: li 28, 255 +; CHECK-NEXT: std 0, 80(1) +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_1: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: lfdu 2, 8(29) +; CHECK-NEXT: lfdu 1, 8(30) +; CHECK-NEXT: bl atan2 +; CHECK-NEXT: nop +; CHECK-NEXT: addi 28, 28, -1 +; CHECK-NEXT: stfd 1, 0(30) +; CHECK-NEXT: cmpldi 28, 0 +; CHECK-NEXT: bc 12, 1, .LBB3_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +root: + br label %for.body + +exit: + ret void + +for.body: + %i = phi i64 [ 0, %root ], [ %next, %for.body ] + %idx1 = getelementptr inbounds double, ptr %cast1, i64 %i + %idx2 = getelementptr inbounds double, ptr %cast2, i64 %i + %val1 = load double, ptr %idx1 + %val2 = load double, ptr %idx2 + %tan = tail call nnan ninf nsz arcp double @llvm.experimental.constrained.atan2.f64(double %val1, double %val2, metadata !"round.dynamic", metadata !"fpexcept.strict") + store double %tan, ptr %idx1, align 8 + %next = add nuw nsw i64 %i, 1 + %cond = icmp eq i64 %next, 255 + br i1 %cond, label %exit, label %for.body +} + declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll index 7e5ea173e52295..3adc46143f9f20 100644 --- a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll @@ -1635,3 +1635,63 @@ define i64 @llround_f64(double %a) nounwind strictfp { %1 = call i64 @llvm.experimental.constrained.llround.i64.f64(double %a, metadata !"fpexcept.strict") strictfp ret i64 %1 } + +declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata) + +define double @atan2_f64(double %a, double %b) nounwind strictfp { +; RV32IFD-LABEL: atan2_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call atan2 +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: atan2_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call atan2 +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: atan2_f64: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: call atan2 +; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: atan2_f64: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINXZDINX-NEXT: call atan2 +; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV64IZFINXZDINX-NEXT: ret +; +; RV32I-LABEL: atan2_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call atan2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: atan2_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call atan2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.atan2.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index 4a109ee96a3d3e..edf818ab95131c 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -6539,6 +6539,392 @@ entry: ret <4 x double> %tan } +define <1 x float> @constrained_vector_atan2_v1f32() #0 { +; S390X-LABEL: constrained_vector_atan2_v1f32: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -160 +; S390X-NEXT: .cfi_def_cfa_offset 320 +; S390X-NEXT: larl %r1, .LCPI124_0 +; S390X-NEXT: le %f0, 0(%r1) +; S390X-NEXT: larl %r1, .LCPI124_1 +; S390X-NEXT: le %f2, 0(%r1) +; S390X-NEXT: brasl %r14, atan2f@PLT +; S390X-NEXT: lmg %r14, %r15, 272(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_atan2_v1f32: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -160 +; SZ13-NEXT: .cfi_def_cfa_offset 320 +; SZ13-NEXT: larl %r1, .LCPI124_0 +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: larl %r1, .LCPI124_1 +; SZ13-NEXT: lde %f2, 0(%r1) +; SZ13-NEXT: brasl %r14, atan2f@PLT +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vlr %v24, %v0 +; SZ13-NEXT: lmg %r14, %r15, 272(%r15) +; SZ13-NEXT: br %r14 +entry: + %atan2 = call <1 x float> @llvm.experimental.constrained.atan2.v1f32( + <1 x float> <float 42.0>, + <1 x float> <float 43.0>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %atan2 +} + +define <2 x double> @constrained_vector_atan2_v2f64() #0 { +; S390X-LABEL: constrained_vector_atan2_v2f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -168 +; S390X-NEXT: .cfi_def_cfa_offset 328 +; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: larl %r1, .LCPI125_0 +; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: larl %r1, .LCPI125_1 +; S390X-NEXT: ld %f2, 0(%r1) +; S390X-NEXT: brasl %r14, atan2@PLT +; S390X-NEXT: larl %r1, .LCPI125_2 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: larl %r1, .LCPI125_3 +; S390X-NEXT: ld %f2, 0(%r1) +; S390X-NEXT: ldr %f8, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, atan2@PLT +; S390X-NEXT: ldr %f2, %f8 +; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 280(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_atan2_v2f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -176 +; SZ13-NEXT: .cfi_def_cfa_offset 336 +; SZ13-NEXT: larl %r1, .LCPI125_0 +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: larl %r1, .LCPI125_1 +; SZ13-NEXT: ld %f2, 0(%r1) +; SZ13-NEXT: brasl %r14, atan2@PLT +; SZ13-NEXT: larl %r1, .LCPI125_2 +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: larl %r1, .LCPI125_3 +; SZ13-NEXT: ld %f2, 0(%r1) +; SZ13-NEXT: brasl %r14, atan2@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v24, %v0, %v1 +; SZ13-NEXT: lmg %r14, %r15, 288(%r15) +; SZ13-NEXT: br %r14 +entry: + %atan2 = call <2 x double> @llvm.experimental.constrained.atan2.v2f64( + <2 x double> <double 42.0, double 42.1>, + <2 x double> <double 43.0, double 43.1>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %atan2 +} + +define <3 x float> @constrained_vector_atan2_v3f32() #0 { +; S390X-LABEL: constrained_vector_atan2_v3f32: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -176 +; S390X-NEXT: .cfi_def_cfa_offset 336 +; S390X-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: larl %r1, .LCPI126_0 +; S390X-NEXT: le %f0, 0(%r1) +; S390X-NEXT: larl %r1, .LCPI126_1 +; S390X-NEXT: le %f2, 0(%r1) +; S390X-NEXT: brasl %r14, atan2f@PLT +; S390X-NEXT: larl %r1, .LCPI126_2 +; S390X-NEXT: le %f1, 0(%r1) +; S390X-NEXT: larl %r1, .LCPI126_3 +; S390X-NEXT: le %f2, 0(%r1) +; S390X-NEXT: ler %f8, %f0 +; S390X-NEXT: ler %f0, %f1 +; S390X-NEXT: brasl %r14, atan2f@PLT +; S390X-NEXT: larl %r1, .LCPI126_4 +; S390X-NEXT: le %f1, 0(%r1) +; S390X-NEXT: larl %r1, .LCPI126_5 +; S390X-NEXT: le %f2, 0(%r1) +; S390X-NEXT: ler %f9, %f0 +; S390X-NEXT: ler %f0, %f1 +; S390X-NEXT: brasl %r14, atan2f@PLT +; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 +; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 288(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_atan2_v3f32: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -192 +; SZ13-NEXT: .cfi_def_cfa_offset 352 +; SZ13-NEXT: larl %r1, .LCPI126_0 +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: larl %r1, .LCPI126_1 +; SZ13-NEXT: lde %f2, 0(%r1) +; SZ13-NEXT: brasl %r14, atan2f@PLT +; SZ13-NEXT: larl %r1, .LCPI126_2 +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: larl %r1, .LCPI126_3 +; SZ13-NEXT: lde %f2, 0(%r1) +; SZ13-NEXT: brasl %r14, atan2f@PLT +; SZ13-NEXT: larl %r1, .LCPI126_4 +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: larl %r1, .LCPI126_5 +; SZ13-NEXT: lde %f2, 0(%r1) +; SZ13-NEXT: brasl %r14, atan2f@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vmrhf %v0, %v1, %v0 +; SZ13-NEXT: vl %v1, 176(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: vrepf %v1, %v1, 0 +; SZ13-NEXT: vmrhg %v24, %v0, %v1 +; SZ13-NEXT: lmg %r14, %r15, 304(%r15) +; SZ13-NEXT: br %r14 +entry: + %atan2 = call <3 x float> @llvm.experimental.constrained.atan2.v3f32( + <3 x float> <float 42.0, float 43.0, float 44.0>, + <3 x float> <float 42.125, float 43.25, float 44.375>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %atan2 +} + +define void @constrained_vector_atan2_v3f64(ptr %a, ptr %b) #0 { +; S390X-LABEL: constrained_vector_atan2_v3f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r13, %r15, 104(%r15) +; S390X-NEXT: .cfi_offset %r13, -56 +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -200 +; S390X-NEXT: .cfi_def_cfa_offset 360 +; S390X-NEXT: std %f8, 192(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 184(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f10, 176(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f11, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f12, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: .cfi_offset %f10, -184 +; S390X-NEXT: .cfi_offset %f11, -192 +; S390X-NEXT: .cfi_offset %f12, -200 +; S390X-NEXT: lgr %r13, %r2 +; S390X-NEXT: ld %f8, 0(%r2) +; S390X-NEXT: ld %f9, 8(%r2) +; S390X-NEXT: ld %f0, 16(%r2) +; S390X-NEXT: ld %f10, 0(%r3) +; S390X-NEXT: ld %f2, 16(%r3) +; S390X-NEXT: ld %f11, 8(%r3) +; S390X-NEXT: brasl %r14, atan2@PLT +; S390X-NEXT: ldr %f12, %f0 +; S390X-NEXT: ldr %f0, %f9 +; S390X-NEXT: ldr %f2, %f11 +; S390X-NEXT: brasl %r14, atan2@PLT +; S390X-NEXT: ldr %f9, %f0 +; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: brasl %r14, atan2@PLT +; S390X-NEXT: std %f0, 0(%r13) +; S390X-NEXT: std %f9, 8(%r13) +; S390X-NEXT: std %f12, 16(%r13) +; S390X-NEXT: ld %f8, 192(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 184(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f10, 176(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f11, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f12, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r13, %r15, 304(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_atan2_v3f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r13, %r15, 104(%r15) +; SZ13-NEXT: .cfi_offset %r13, -56 +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -224 +; SZ13-NEXT: .cfi_def_cfa_offset 384 +; SZ13-NEXT: std %f8, 216(%r15) # 8-byte Folded Spill +; SZ13-NEXT: std %f9, 208(%r15) # 8-byte Folded Spill +; SZ13-NEXT: .cfi_offset %f8, -168 +; SZ13-NEXT: .cfi_offset %f9, -176 +; SZ13-NEXT: vl %v0, 0(%r2), 4 +; SZ13-NEXT: vl %v2, 0(%r3), 4 +; SZ13-NEXT: ld %f8, 16(%r2) +; SZ13-NEXT: ld %f9, 16(%r3) +; SZ13-NEXT: lgr %r13, %r2 +; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: vst %v2, 192(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 +; SZ13-NEXT: # kill: def $f2d killed $f2d killed $v2 +; SZ13-NEXT: brasl %r14, atan2@PLT +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: vl %v0, 176(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: vl %v1, 192(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: vrepg %v0, %v0, 1 +; SZ13-NEXT: vrepg %v2, %v1, 1 +; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 +; SZ13-NEXT: # kill: def $f2d killed $f2d killed $v2 +; SZ13-NEXT: brasl %r14, atan2@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v1, %v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ldr %f0, %f8 +; SZ13-NEXT: ldr %f2, %f9 +; SZ13-NEXT: brasl %r14, atan2@PLT +; SZ13-NEXT: std %f0, 16(%r13) +; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: ld %f8, 216(%r15) # 8-byte Folded Reload +; SZ13-NEXT: ld %f9, 208(%r15) # 8-byte Folded Reload +; SZ13-NEXT: vst %v0, 0(%r13), 4 +; SZ13-NEXT: lmg %r13, %r15, 328(%r15) +; SZ13-NEXT: br %r14 +entry: + %c = load <3 x double>, ptr %a + %d = load <3 x double>, ptr %b + %atan2 = call <3 x double> @llvm.experimental.constrained.atan2.v3f64( + <3 x double> %c, + <3 x double> %d, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store <3 x double> %atan2, ptr %a + ret void +} + +define <4 x double> @constrained_vector_atan2_v4f64() #0 { +; S390X-LABEL: constrained_vector_atan2_v4f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -184 +; S390X-NEXT: .cfi_def_cfa_offset 344 +; S390X-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: .cfi_offset %f10, -184 +; S390X-NEXT: larl %r1, .LCPI128_0 +; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: larl %r1, .LCPI128_1 +; S390X-NEXT: ld %f2, 0(%r1) +; S390X-NEXT: brasl %r14, atan2@PLT +; S390X-NEXT: larl %r1, .LCPI128_2 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: larl %r1, .LCPI128_3 +; S390X-NEXT: ld %f2, 0(%r1) +; S390X-NEXT: ldr %f8, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, atan2@PLT +; S390X-NEXT: larl %r1, .LCPI128_4 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: larl %r1, .LCPI128_5 +; S390X-NEXT: ld %f2, 0(%r1) +; S390X-NEXT: ldr %f9, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, atan2@PLT +; S390X-NEXT: larl %r1, .LCPI128_6 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: larl %r1, .LCPI128_7 +; S390X-NEXT: ld %f2, 0(%r1) +; S390X-NEXT: ldr %f10, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, atan2@PLT +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 +; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 296(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_atan2_v4f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -192 +; SZ13-NEXT: .cfi_def_cfa_offset 352 +; SZ13-NEXT: larl %r1, .LCPI128_0 +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: larl %r1, .LCPI128_1 +; SZ13-NEXT: ld %f2, 0(%r1) +; SZ13-NEXT: brasl %r14, atan2@PLT +; SZ13-NEXT: larl %r1, .LCPI128_2 +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: larl %r1, .LCPI128_3 +; SZ13-NEXT: ld %f2, 0(%r1) +; SZ13-NEXT: brasl %r14, atan2@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: larl %r1, .LCPI128_4 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: larl %r1, .LCPI128_5 +; SZ13-NEXT: ld %f2, 0(%r1) +; SZ13-NEXT: brasl %r14, atan2@PLT +; SZ13-NEXT: larl %r1, .LCPI128_6 +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: larl %r1, .LCPI128_7 +; SZ13-NEXT: ld %f2, 0(%r1) +; SZ13-NEXT: brasl %r14, atan2@PLT +; SZ13-NEXT: vl %v1, 176(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: vl %v24, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v26, %v0, %v1 +; SZ13-NEXT: lmg %r14, %r15, 304(%r15) +; SZ13-NEXT: br %r14 +entry: + %atan2 = call <4 x double> @llvm.experimental.constrained.atan2.v4f64( + <4 x double> <double 42.0, double 42.1, + double 42.2, double 42.3>, + <4 x double> <double 43.0, double 43.1, + double 43.2, double 43.3>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %atan2 +} + attributes #0 = { strictfp } declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) @@ -6552,6 +6938,7 @@ declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32 declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.atan2.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) @@ -6579,6 +6966,7 @@ declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.atan2.v1f32(<1 x float>, <1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) @@ -6617,6 +7005,8 @@ declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metada declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.atan2.v3f32(<3 x float>, <3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.atan2.v3f64(<3 x double>, <3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) @@ -6657,6 +7047,7 @@ declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32 declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.atan2.v4f64(<4 x double>, <4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) >From a95d91057f17dccef80ed46dff0aafc01a176347 Mon Sep 17 00:00:00 2001 From: Tex Riddell <t...@microsoft.com> Date: Mon, 11 Nov 2024 18:14:16 -0800 Subject: [PATCH 5/6] Update more tests --- .../ppcf128-constrained-fp-intrinsics.ll | 46 ++ .../vector-constrained-fp-intrinsics.ll | 419 ++++++++++++++++++ .../CodeGen/RISCV/float-intrinsics-strict.ll | 60 +++ 3 files changed, 525 insertions(+) diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index 61a0fddeda33e1..c1ee436a40c557 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -2110,6 +2110,51 @@ entry: ret ppc_fp128 %tan } +define ppc_fp128 @test_atan2_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 { +; PC64LE-LABEL: test_atan2_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: std 0, 48(1) +; PC64LE-NEXT: bl atan2l +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_atan2_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: std 0, 48(1) +; PC64LE9-NEXT: bl atan2l +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_atan2_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: std 0, 128(1) +; PC64-NEXT: bl atan2l +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %atan2 = call ppc_fp128 @llvm.experimental.constrained.atan2.ppcf128( + ppc_fp128 %first, + ppc_fp128 %second, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret ppc_fp128 %atan2 +} + attributes #0 = { nounwind strictfp } attributes #1 = { strictfp } @@ -2141,6 +2186,7 @@ declare ppc_fp128 @llvm.experimental.constrained.sin.ppcf128(ppc_fp128, metadata declare ppc_fp128 @llvm.experimental.constrained.sqrt.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.fsub.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.tan.ppcf128(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.atan2.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.trunc.ppcf128(ppc_fp128, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128, metadata) diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index aedb1a9c65cf89..71c3069a406fe3 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -8333,6 +8333,420 @@ entry: ret <4 x double> %tan } +define <1 x float> @constrained_vector_atan2_v1f32(<1 x float> %x, <1 x float> %y) #0 { +; PC64LE-LABEL: constrained_vector_atan2_v1f32: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: std 0, 48(1) +; PC64LE-NEXT: bl atan2f +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_atan2_v1f32: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: std 0, 48(1) +; PC64LE9-NEXT: bl atan2f +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %atan2 = call <1 x float> @llvm.experimental.constrained.atan2.v1f32( + <1 x float> %x, + <1 x float> %y, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <1 x float> %atan2 +} + +define <2 x double> @constrained_vector_atan2_v2f64(<2 x double> %x, <2 x double> %y) #0 { +; PC64LE-LABEL: constrained_vector_atan2_v2f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -96(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 30, 2 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 31, 3 +; PC64LE-NEXT: xxlor 1, 62, 62 +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: bl atan2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 61, 1, 1 +; PC64LE-NEXT: xxswapd 1, 62 +; PC64LE-NEXT: xxswapd 2, 63 +; PC64LE-NEXT: bl atan2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: xxmrghd 34, 61, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 96 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_atan2_v2f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: std 0, 96(1) +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill +; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: vmr 30, 2 +; PC64LE9-NEXT: xscpsgndp 1, 62, 62 +; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: bl atan2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 61, 1, 1 +; PC64LE9-NEXT: xxswapd 1, 62 +; PC64LE9-NEXT: xxswapd 2, 63 +; PC64LE9-NEXT: bl atan2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xxmrghd 34, 61, 1 +; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 61, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 80 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %atan2 = call <2 x double> @llvm.experimental.constrained.atan2.v2f64( + <2 x double> %x, + <2 x double> %y, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <2 x double> %atan2 +} + +define <3 x float> @constrained_vector_atan2_v3f32(<3 x float> %x, <3 x float> %y) #0 { +; PC64LE-LABEL: constrained_vector_atan2_v3f32: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -96(1) +; PC64LE-NEXT: xxsldwi 0, 34, 34, 1 +; PC64LE-NEXT: xxsldwi 2, 35, 35, 1 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: stfd 30, 80(1) # 8-byte Folded Spill +; PC64LE-NEXT: stfd 31, 88(1) # 8-byte Folded Spill +; PC64LE-NEXT: xscvspdpn 1, 0 +; PC64LE-NEXT: xscvspdpn 2, 2 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: vmr 30, 2 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 31, 3 +; PC64LE-NEXT: bl atan2f +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 62 +; PC64LE-NEXT: xxswapd 2, 63 +; PC64LE-NEXT: fmr 31, 1 +; PC64LE-NEXT: xscvspdpn 1, 0 +; PC64LE-NEXT: xscvspdpn 2, 2 +; PC64LE-NEXT: bl atan2f +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxsldwi 0, 62, 62, 3 +; PC64LE-NEXT: xxsldwi 2, 63, 63, 3 +; PC64LE-NEXT: fmr 30, 1 +; PC64LE-NEXT: xscvspdpn 1, 0 +; PC64LE-NEXT: xscvspdpn 2, 2 +; PC64LE-NEXT: bl atan2f +; PC64LE-NEXT: nop +; PC64LE-NEXT: xscvdpspn 0, 1 +; PC64LE-NEXT: xscvdpspn 1, 30 +; PC64LE-NEXT: addis 3, 2, .LCPI194_0@toc@ha +; PC64LE-NEXT: lfd 30, 80(1) # 8-byte Folded Reload +; PC64LE-NEXT: xscvdpspn 36, 31 +; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload +; PC64LE-NEXT: addi 3, 3, .LCPI194_0@toc@l +; PC64LE-NEXT: xxmrghw 34, 1, 0 +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 35, 0 +; PC64LE-NEXT: vperm 2, 4, 2, 3 +; PC64LE-NEXT: addi 1, 1, 96 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_atan2_v3f32: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1 +; PC64LE9-NEXT: std 0, 96(1) +; PC64LE9-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: xscvspdpn 1, 0 +; PC64LE9-NEXT: xxsldwi 0, 35, 35, 1 +; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: vmr 30, 2 +; PC64LE9-NEXT: xscvspdpn 2, 0 +; PC64LE9-NEXT: bl atan2f +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xxswapd 0, 62 +; PC64LE9-NEXT: fmr 31, 1 +; PC64LE9-NEXT: xscvspdpn 1, 0 +; PC64LE9-NEXT: xxswapd 0, 63 +; PC64LE9-NEXT: xscvspdpn 2, 0 +; PC64LE9-NEXT: bl atan2f +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xxsldwi 0, 62, 62, 3 +; PC64LE9-NEXT: fmr 30, 1 +; PC64LE9-NEXT: xscvspdpn 1, 0 +; PC64LE9-NEXT: xxsldwi 0, 63, 63, 3 +; PC64LE9-NEXT: xscvspdpn 2, 0 +; PC64LE9-NEXT: bl atan2f +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: xscvdpspn 1, 30 +; PC64LE9-NEXT: addis 3, 2, .LCPI194_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE9-NEXT: addi 3, 3, .LCPI194_0@toc@l +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: addi 1, 1, 80 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %atan2 = call <3 x float> @llvm.experimental.constrained.atan2.v3f32( + <3 x float> %x, + <3 x float> %y, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <3 x float> %atan2 +} + +define <3 x double> @constrained_vector_atan2_v3f64(<3 x double> %x, <3 x double> %y) #0 { +; PC64LE-LABEL: constrained_vector_atan2_v3f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -96(1) +; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: stfd 28, 64(1) # 8-byte Folded Spill +; PC64LE-NEXT: fmr 28, 2 +; PC64LE-NEXT: fmr 2, 4 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: stfd 29, 72(1) # 8-byte Folded Spill +; PC64LE-NEXT: stfd 30, 80(1) # 8-byte Folded Spill +; PC64LE-NEXT: fmr 30, 5 +; PC64LE-NEXT: stfd 31, 88(1) # 8-byte Folded Spill +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: fmr 31, 6 +; PC64LE-NEXT: fmr 29, 3 +; PC64LE-NEXT: bl atan2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 63, 1, 1 +; PC64LE-NEXT: fmr 1, 28 +; PC64LE-NEXT: fmr 2, 30 +; PC64LE-NEXT: bl atan2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxmrghd 63, 1, 63 +; PC64LE-NEXT: fmr 1, 29 +; PC64LE-NEXT: fmr 2, 31 +; PC64LE-NEXT: bl atan2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: xxswapd 1, 63 +; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lfd 30, 80(1) # 8-byte Folded Reload +; PC64LE-NEXT: lfd 29, 72(1) # 8-byte Folded Reload +; PC64LE-NEXT: lfd 28, 64(1) # 8-byte Folded Reload +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 96 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_atan2_v3f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: std 0, 96(1) +; PC64LE9-NEXT: stfd 28, 48(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: fmr 28, 2 +; PC64LE9-NEXT: fmr 2, 4 +; PC64LE9-NEXT: stfd 29, 56(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE9-NEXT: fmr 31, 6 +; PC64LE9-NEXT: fmr 30, 5 +; PC64LE9-NEXT: fmr 29, 3 +; PC64LE9-NEXT: bl atan2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 +; PC64LE9-NEXT: fmr 1, 28 +; PC64LE9-NEXT: fmr 2, 30 +; PC64LE9-NEXT: bl atan2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xxmrghd 63, 1, 63 +; PC64LE9-NEXT: fmr 1, 29 +; PC64LE9-NEXT: fmr 2, 31 +; PC64LE9-NEXT: bl atan2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: fmr 3, 1 +; PC64LE9-NEXT: xxswapd 1, 63 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 29, 56(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 28, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 80 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %atan2 = call <3 x double> @llvm.experimental.constrained.atan2.v3f64( + <3 x double> %x, + <3 x double> %y, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <3 x double> %atan2 +} + +define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double> %y) #0 { +; PC64LE-LABEL: constrained_vector_atan2_v4f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -128(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 144(1) +; PC64LE-NEXT: stxvd2x 59, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 60, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 28, 2 +; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 96 +; PC64LE-NEXT: xxlor 1, 60, 60 +; PC64LE-NEXT: vmr 29, 3 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 30, 4 +; PC64LE-NEXT: li 3, 112 +; PC64LE-NEXT: xxlor 2, 62, 62 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 31, 5 +; PC64LE-NEXT: bl atan2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 59, 1, 1 +; PC64LE-NEXT: xxswapd 1, 60 +; PC64LE-NEXT: xxswapd 2, 62 +; PC64LE-NEXT: bl atan2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxmrghd 62, 59, 1 +; PC64LE-NEXT: xxlor 1, 61, 61 +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: bl atan2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 60, 1, 1 +; PC64LE-NEXT: xxswapd 1, 61 +; PC64LE-NEXT: xxswapd 2, 63 +; PC64LE-NEXT: bl atan2 +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 112 +; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: xxmrghd 35, 60, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 96 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 60, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 59, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 128 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_atan2_v4f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -112(1) +; PC64LE9-NEXT: std 0, 128(1) +; PC64LE9-NEXT: stxv 60, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 80(1) # 16-byte Folded Spill +; PC64LE9-NEXT: vmr 30, 4 +; PC64LE9-NEXT: xscpsgndp 2, 62, 62 +; PC64LE9-NEXT: vmr 28, 2 +; PC64LE9-NEXT: xscpsgndp 1, 60, 60 +; PC64LE9-NEXT: stxv 59, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 61, 64(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 96(1) # 16-byte Folded Spill +; PC64LE9-NEXT: vmr 31, 5 +; PC64LE9-NEXT: vmr 29, 3 +; PC64LE9-NEXT: bl atan2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 59, 1, 1 +; PC64LE9-NEXT: xxswapd 1, 60 +; PC64LE9-NEXT: xxswapd 2, 62 +; PC64LE9-NEXT: bl atan2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xxmrghd 62, 59, 1 +; PC64LE9-NEXT: xscpsgndp 1, 61, 61 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: bl atan2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 60, 1, 1 +; PC64LE9-NEXT: xxswapd 1, 61 +; PC64LE9-NEXT: xxswapd 2, 63 +; PC64LE9-NEXT: bl atan2 +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xxmrghd 35, 60, 1 +; PC64LE9-NEXT: vmr 2, 30 +; PC64LE9-NEXT: lxv 63, 96(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 80(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 61, 64(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 60, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 59, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 112 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %atan2 = call <4 x double> @llvm.experimental.constrained.atan2.v4f64( + <4 x double> %x, + <4 x double> %y, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <4 x double> %atan2 +} + attributes #0 = { nounwind strictfp noimplicitfloat } attributes #1 = { strictfp } @@ -8348,6 +8762,7 @@ declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32 declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.atan2.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) @@ -8394,6 +8809,7 @@ declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.atan2.v1f32(<1 x float>, <1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) @@ -8449,6 +8865,8 @@ declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metada declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.atan2.v3f32(<3 x float>, <3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.atan2.v3f64(<3 x double>, <3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) @@ -8506,6 +8924,7 @@ declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32 declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.atan2.v4f64(<4 x double>, <4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll index 7b2d38fefaacb1..f04da712dce311 100644 --- a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll @@ -414,6 +414,66 @@ define float @tan_f32(float %a) nounwind strictfp { ret float %1 } +declare float @llvm.experimental.constrained.atan2.f32(float, float, metadata, metadata) + +define float @atan2_f32(float %a, float %b) nounwind strictfp { +; RV32IF-LABEL: atan2_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call atan2f +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: atan2_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call atan2f +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32IZFINX-LABEL: atan2_f32: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: addi sp, sp, -16 +; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: call atan2f +; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: addi sp, sp, 16 +; RV32IZFINX-NEXT: ret +; +; RV64IZFINX-LABEL: atan2_f32: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: addi sp, sp, -16 +; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINX-NEXT: call atan2f +; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINX-NEXT: addi sp, sp, 16 +; RV64IZFINX-NEXT: ret +; +; RV32I-LABEL: atan2_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call atan2f +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: atan2_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call atan2f +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.atan2.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) define float @pow_f32(float %a, float %b) nounwind strictfp { >From 0e4879e4381a30731c9fe6875e5e7ed319a244b1 Mon Sep 17 00:00:00 2001 From: Tex Riddell <t...@microsoft.com> Date: Mon, 11 Nov 2024 18:20:14 -0800 Subject: [PATCH 6/6] LangRef.rst: Add missing llvm.experimental.constrained.atan2 and revise llvm.atan2 definition. --- llvm/docs/LangRef.rst | 49 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index a956e6869bf90f..06d14bc0b8d370 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15753,16 +15753,17 @@ all types however. :: - declare float @llvm.atan2.f32(float %X, float %Y) - declare double @llvm.atan2.f64(double %X, double %Y) - declare x86_fp80 @llvm.atan2.f80(x86_fp80 %X, x86_fp80 %Y) - declare fp128 @llvm.atan2.f128(fp128 %X, fp128 %Y) - declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %X, ppc_fp128 %Y) + declare float @llvm.atan2.f32(float %Y, float %X) + declare double @llvm.atan2.f64(double %Y, double %X) + declare x86_fp80 @llvm.atan2.f80(x86_fp80 %Y, x86_fp80 %X) + declare fp128 @llvm.atan2.f128(fp128 %Y, fp128 %X) + declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %Y, ppc_fp128 %X) Overview: """"""""" -The '``llvm.atan2.*``' intrinsics return the arctangent of the operand. +The '``llvm.atan2.*``' intrinsics return the arctangent of ``Y/X`` accounting +for the quadrant. Arguments: """""""""" @@ -27264,6 +27265,42 @@ This function returns the arctangent of the specified operand, returning the same values as the libm ``atan`` functions would, and handles error conditions in the same way. +'``llvm.experimental.constrained.atan2``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare <type> + @llvm.experimental.constrained.atan2(<type> <op1>, + <type> <op2>, + metadata <rounding mode>, + metadata <exception behavior>) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.atan2``' intrinsic returns the arctangent +of ``<op1>`` divided by ``<op2>`` accounting for the quadrant. + +Arguments: +"""""""""" + +The first two arguments and the return value are floating-point numbers of the +same type. + +The third and fourth arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the quadrant-specific arctangent using the specified +operands, returning the same values as the libm ``atan2`` functions would, and +handles error conditions in the same way. + '``llvm.experimental.constrained.sinh``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits