https://github.com/rjodinchr updated https://github.com/llvm/llvm-project/pull/66651
>From b6df142239256e979a70896f324f9ed3547c640c Mon Sep 17 00:00:00 2001 From: Romaric Jodin <rjo...@chromium.org> Date: Mon, 18 Sep 2023 09:34:56 +0200 Subject: [PATCH 1/2] Revert "clang/OpenCL: Add inline implementations of sqrt in builtin header" This reverts commit 15e0fe0b6122e32657b98daf74a1fce028d2e5bf. --- clang/lib/Headers/opencl-c-base.h | 58 ------- clang/lib/Headers/opencl-c.h | 26 +++ clang/lib/Sema/OpenCLBuiltins.td | 5 +- clang/test/CodeGenOpenCL/sqrt-fpmath.cl | 201 ------------------------ 4 files changed, 27 insertions(+), 263 deletions(-) delete mode 100644 clang/test/CodeGenOpenCL/sqrt-fpmath.cl diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h index d56e5ceae652ad5..2494f6213fc5695 100644 --- a/clang/lib/Headers/opencl-c-base.h +++ b/clang/lib/Headers/opencl-c-base.h @@ -819,64 +819,6 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))) #endif // cl_intel_device_side_avc_motion_estimation -/** - * Compute square root. - * - * Provide inline implementations using the builtin so that we get appropriate - * !fpmath based on -cl-fp32-correctly-rounded-divide-sqrt, attached to - * llvm.sqrt. The implementation should still provide an external definition. - */ -#define __ovld __attribute__((overloadable)) -#define __cnfn __attribute__((const)) - -inline float __ovld __cnfn sqrt(float __x) { - return __builtin_elementwise_sqrt(__x); -} - -inline float2 __ovld __cnfn sqrt(float2 __x) { - return __builtin_elementwise_sqrt(__x); -} - -inline float3 __ovld __cnfn sqrt(float3 __x) { - return __builtin_elementwise_sqrt(__x); -} - -inline float4 __ovld __cnfn sqrt(float4 __x) { - return __builtin_elementwise_sqrt(__x); -} - -inline float8 __ovld __cnfn sqrt(float8 __x) { - return __builtin_elementwise_sqrt(__x); -} - -inline float16 __ovld __cnfn sqrt(float16 __x) { - return __builtin_elementwise_sqrt(__x); -} - -// We only really want to define the float variants here. However -// -fdeclare-opencl-builtins will not work if some overloads are already - // provided in the base header, so provide all overloads here. - -#ifdef cl_khr_fp64 -double __ovld __cnfn sqrt(double); -double2 __ovld __cnfn sqrt(double2); -double3 __ovld __cnfn sqrt(double3); -double4 __ovld __cnfn sqrt(double4); -double8 __ovld __cnfn sqrt(double8); -double16 __ovld __cnfn sqrt(double16); -#endif //cl_khr_fp64 -#ifdef cl_khr_fp16 -half __ovld __cnfn sqrt(half); -half2 __ovld __cnfn sqrt(half2); -half3 __ovld __cnfn sqrt(half3); -half4 __ovld __cnfn sqrt(half4); -half8 __ovld __cnfn sqrt(half8); -half16 __ovld __cnfn sqrt(half16); -#endif //cl_khr_fp16 - -#undef __cnfn -#undef __ovld - // Disable any extensions we may have enabled previously. #pragma OPENCL EXTENSION all : disable diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index 1efbbf8f8ee6a01..288bb18bc654ebc 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -8496,6 +8496,32 @@ half8 __ovld __cnfn sinpi(half8); half16 __ovld __cnfn sinpi(half16); #endif //cl_khr_fp16 +/** + * Compute square root. + */ +float __ovld __cnfn sqrt(float); +float2 __ovld __cnfn sqrt(float2); +float3 __ovld __cnfn sqrt(float3); +float4 __ovld __cnfn sqrt(float4); +float8 __ovld __cnfn sqrt(float8); +float16 __ovld __cnfn sqrt(float16); +#ifdef cl_khr_fp64 +double __ovld __cnfn sqrt(double); +double2 __ovld __cnfn sqrt(double2); +double3 __ovld __cnfn sqrt(double3); +double4 __ovld __cnfn sqrt(double4); +double8 __ovld __cnfn sqrt(double8); +double16 __ovld __cnfn sqrt(double16); +#endif //cl_khr_fp64 +#ifdef cl_khr_fp16 +half __ovld __cnfn sqrt(half); +half2 __ovld __cnfn sqrt(half2); +half3 __ovld __cnfn sqrt(half3); +half4 __ovld __cnfn sqrt(half4); +half8 __ovld __cnfn sqrt(half8); +half16 __ovld __cnfn sqrt(half16); +#endif //cl_khr_fp16 + /** * Compute tangent. */ diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td index 9db450281912d2f..0cceba090bd8f26 100644 --- a/clang/lib/Sema/OpenCLBuiltins.td +++ b/clang/lib/Sema/OpenCLBuiltins.td @@ -563,15 +563,12 @@ foreach name = ["acos", "acosh", "acospi", "log", "log2", "log10", "log1p", "logb", "rint", "round", "rsqrt", "sin", "sinh", "sinpi", + "sqrt", "tan", "tanh", "tanpi", "tgamma", "trunc", "lgamma"] in { def : Builtin<name, [FGenTypeN, FGenTypeN], Attr.Const>; } - -// sqrt is handled in opencl-c-base.h to handle -// -cl-fp32-correctly-rounded-divide-sqrt. - foreach name = ["nan"] in { def : Builtin<name, [GenTypeFloatVecAndScalar, GenTypeUIntVecAndScalar], Attr.Const>; def : Builtin<name, [GenTypeDoubleVecAndScalar, GenTypeULongVecAndScalar], Attr.Const>; diff --git a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl deleted file mode 100644 index df30085cba2e7d5..000000000000000 --- a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl +++ /dev/null @@ -1,201 +0,0 @@ -// Test that float variants of sqrt are emitted as available_externally inline -// definitions that call the sqrt intrinsic with appropriate !fpmath metadata -// depending on -cl-fp32-correctly-rounded-divide-sqrt - -// Test with -fdeclare-opencl-builtins -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s - -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s - -// Test without -fdeclare-opencl-builtins -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s - -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// CHECK-LABEL: define {{.*}} float @call_sqrt_f32( -// CHECK: call {{.*}} float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+$}} -float call_sqrt_f32(float x) { - return sqrt(x); -} - -// CHECK-LABEL: define available_externally float @_Z4sqrtf(float noundef %__x) -// DEFAULT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call float @llvm.sqrt.f32(float %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}){{$}} - -// CHECK-LABEL: define {{.*}} <2 x float> @call_sqrt_v2f32( -// CHECK: call {{.*}} <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.*}}) #{{[0-9]+$}} -float2 call_sqrt_v2f32(float2 x) { - return sqrt(x); -} - -// CHECK-LABEL: define available_externally <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %__x) -// DEFAULT: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}} - -// CHECK-LABEL: define {{.*}} <3 x float> @call_sqrt_v3f32( -// CHECK: call {{.*}} <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.*}}) #{{[0-9]+$}} -float3 call_sqrt_v3f32(float3 x) { - return sqrt(x); -} - -// CHECK-LABEL: define available_externally <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %__x) -// DEFAULT: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}} - - -// CHECK-LABEL: define {{.*}} <4 x float> @call_sqrt_v4f32( -// CHECK: call {{.*}} <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.*}}) #{{[0-9]+$}} -float4 call_sqrt_v4f32(float4 x) { - return sqrt(x); -} - -// CHECK-LABEL: define available_externally <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %__x) -// DEFAULT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}} - -// CHECK-LABEL: define {{.*}} <8 x float> @call_sqrt_v8f32( -// CHECK: call {{.*}} <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.*}}) #{{[0-9]+$}} -float8 call_sqrt_v8f32(float8 x) { - return sqrt(x); -} - -// CHECK-LABEL: define available_externally <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %__x) -// DEFAULT: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}} - - -// CHECK-LABEL: define {{.*}} <16 x float> @call_sqrt_v16f32( -// CHECK: call {{.*}} <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.*}}) #{{[0-9]+$}} -float16 call_sqrt_v16f32(float16 x) { - return sqrt(x); -} - -// CHECK-LABEL: define available_externally <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %__x) -// DEFAULT: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}} - - -// Not for f64 -// CHECK-LABEL: define {{.*}} double @call_sqrt_f64( -// CHECK: call {{.*}} double @_Z4sqrtd(double noundef %{{.+}}) #{{[0-9]+$}} -double call_sqrt_f64(double x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// Not for f64 -// CHECK-LABEL: define {{.*}} <2 x double> @call_sqrt_v2f64( -// CHECK: call {{.*}} <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef %{{.+}}) #{{[0-9]+$}} -double2 call_sqrt_v2f64(double2 x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// CHECK-LABEL: define {{.*}} <3 x double> @call_sqrt_v3f64( -// CHECK: call {{.*}} <3 x double> @_Z4sqrtDv3_d(<3 x double> noundef %{{.+}}) #{{[0-9]+$}} -double3 call_sqrt_v3f64(double3 x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// CHECK-LABEL: define {{.*}} <4 x double> @call_sqrt_v4f64( -// CHECK: call {{.*}} <4 x double> @_Z4sqrtDv4_d(<4 x double> noundef %{{.+}}) #{{[0-9]+$}} -double4 call_sqrt_v4f64(double4 x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// CHECK-LABEL: define {{.*}} <8 x double> @call_sqrt_v8f64( -// CHECK: call {{.*}} <8 x double> @_Z4sqrtDv8_d(<8 x double> noundef %{{.+}}) #{{[0-9]+$}} -double8 call_sqrt_v8f64(double8 x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// CHECK-LABEL: define {{.*}} <16 x double> @call_sqrt_v16f64( -// CHECK: call {{.*}} <16 x double> @_Z4sqrtDv16_d(<16 x double> noundef %{{.+}}) #{{[0-9]+$}} -double16 call_sqrt_v16f64(double16 x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// Not for f16 -// CHECK-LABEL: define {{.*}} half @call_sqrt_f16( -// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}} -half call_sqrt_f16(half x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16( -// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}} -half2 call_sqrt_v2f16(half2 x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16( -// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}} -half3 call_sqrt_v3f16(half3 x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16( -// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}} -half4 call_sqrt_v4f16(half4 x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16( -// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}} -half8 call_sqrt_v8f16(half8 x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16( -// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}} -half16 call_sqrt_v16f16(half16 x) { - return sqrt(x); -} - -// CHECK-NOT: define - -// DEFAULT: [[$FPMATH]] = !{float 3.000000e+00} >From f76b0029c3a40cd8444599d4946cc8e338657bf7 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <rjo...@chromium.org> Date: Mon, 18 Sep 2023 16:15:06 +0200 Subject: [PATCH 2/2] clang/OpenCL: set sqrt fp accuracy on call to Z4sqrt --- clang/lib/CodeGen/CGCall.cpp | 4 + clang/test/CodeGenOpenCL/sqrt-fpmath.cl | 179 ++++++++++++++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 clang/test/CodeGenOpenCL/sqrt-fpmath.cl diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index e15a4634b1d041b..0d95624f8389048 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5612,6 +5612,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, BundleList); EmitBlock(Cont); } + if (CI->getCalledFunction() && CI->getCalledFunction()->hasName() && + CI->getCalledFunction()->getName().contains("Z4sqrt")) { + SetSqrtFPAccuracy(CI); + } if (callOrInvoke) *callOrInvoke = CI; diff --git a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl new file mode 100644 index 000000000000000..7afde7f91bdfeb5 --- /dev/null +++ b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl @@ -0,0 +1,179 @@ +// Test that float variants of sqrt are emitted as available_externally inline +// definitions that call the sqrt intrinsic with appropriate !fpmath metadata +// depending on -cl-fp32-correctly-rounded-divide-sqrt + +// Test with -fdeclare-opencl-builtins +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -S -emit-llvm -o %t.ll %s +// RUN: FileCheck -check-prefixes=CHECK,DEFAULT %s < %t.ll +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o %t.ll %s +// RUN: FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s < %t.ll + +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o %t.ll %s +// RUN: FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s < %t.ll +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o %t.ll %s +// RUN: FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s < %t.ll + +// Test without -fdeclare-opencl-builtins +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s + +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// CHECK-LABEL: define {{.*}} float @call_sqrt_f32( +// DEFAULT: call float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH:\![0-9]+]]{{$}} +// CORRECTLYROUNDED: call float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH:\![0-9]+]]{{$}} +// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}{{$}} +float call_sqrt_f32(float x) { + return sqrt(x); +} + +// CHECK-LABEL: define {{.*}} <2 x float> @call_sqrt_v2f32( +// DEFAULT: call <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED: call <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} +float2 call_sqrt_v2f32(float2 x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <3 x float> @call_sqrt_v3f32( +// DEFAULT: call <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED: call <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} +float3 call_sqrt_v3f32(float3 x) { + return sqrt(x); +} + + + +// CHECK-LABEL: define {{.*}} <4 x float> @call_sqrt_v4f32( +// DEFAULT: call <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED: call <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} +float4 call_sqrt_v4f32(float4 x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <8 x float> @call_sqrt_v8f32( +// DEFAULT: call <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED: call <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} +float8 call_sqrt_v8f32(float8 x) { + return sqrt(x); +} + + + +// CHECK-LABEL: define {{.*}} <16 x float> @call_sqrt_v16f32( +// DEFAULT: call <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED: call <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} +float16 call_sqrt_v16f32(float16 x) { + return sqrt(x); +} + + + +// Not for f64 +// CHECK-LABEL: define {{.*}} double @call_sqrt_f64( +// CHECK: call {{.*}} double @_Z4sqrtd(double noundef %{{.+}}) #{{[0-9]+$}}{{$}} +double call_sqrt_f64(double x) { + return sqrt(x); +} + + +// Not for f64 +// CHECK-LABEL: define {{.*}} <2 x double> @call_sqrt_v2f64( +// CHECK: call {{.*}} <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +double2 call_sqrt_v2f64(double2 x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <3 x double> @call_sqrt_v3f64( +// CHECK: call {{.*}} <3 x double> @_Z4sqrtDv3_d(<3 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +double3 call_sqrt_v3f64(double3 x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <4 x double> @call_sqrt_v4f64( +// CHECK: call {{.*}} <4 x double> @_Z4sqrtDv4_d(<4 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +double4 call_sqrt_v4f64(double4 x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <8 x double> @call_sqrt_v8f64( +// CHECK: call {{.*}} <8 x double> @_Z4sqrtDv8_d(<8 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +double8 call_sqrt_v8f64(double8 x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <16 x double> @call_sqrt_v16f64( +// CHECK: call {{.*}} <16 x double> @_Z4sqrtDv16_d(<16 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +double16 call_sqrt_v16f64(double16 x) { + return sqrt(x); +} + + +// Not for f16 +// CHECK-LABEL: define {{.*}} half @call_sqrt_f16( +// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}} +half call_sqrt_f16(half x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16( +// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +half2 call_sqrt_v2f16(half2 x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16( +// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +half3 call_sqrt_v3f16(half3 x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16( +// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +half4 call_sqrt_v4f16(half4 x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16( +// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +half8 call_sqrt_v8f16(half8 x) { + return sqrt(x); +} + + +// CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16( +// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} +half16 call_sqrt_v16f16(half16 x) { + return sqrt(x); +} + +// DEFAULT: [[FPMATH]] = !{float 3.000000e+00} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits