[PATCH] D114099: Enable `_Float16` type support on X86 without the avx512fp16 flag

Zahira Ammarguellat via Phabricator via cfe-commits Wed, 17 Nov 2021 08:59:31 -0800

zahiraam created this revision.
zahiraam added reviewers: rjmccall, pengfei, andrew.w.kaylor.
zahiraam requested review of this revision.
Herald added a project: clang.


The _Float16 type is supported on x86 systems with SSE2 enabled. Operations are 
emulated by software emulation and “float” instructions. This patch is allowing 
the support of _Float16 type without the use of -max512fp16 flag. The final 
goal being, perform _Float16 emulation for all arithmetic expressions.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D114099

Files:
  clang/docs/LanguageExtensions.rst
  clang/docs/ReleaseNotes.rst
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/X86/Float16-arithmetic.c
  clang/test/CodeGen/X86/avx512fp16-abi.c
  clang/test/CodeGen/X86/avx512fp16-complex.c
  clang/test/CodeGen/X86/fp16-abi.c
  clang/test/Sema/Float16.c
  clang/test/Sema/conversion-target-dep.c
  clang/test/SemaCXX/Float16.cpp

Index: clang/test/SemaCXX/Float16.cpp
===================================================================
--- clang/test/SemaCXX/Float16.cpp
+++ clang/test/SemaCXX/Float16.cpp
@@ -1,18 +1,9 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
-// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
 
-#ifdef HAVE
 // expected-no-diagnostics
-#endif // HAVE
 
-#ifndef HAVE
-// expected-error@+2{{_Float16 is not supported on this target}}
-#endif // !HAVE
 _Float16 f;
-
-#ifndef HAVE
-// expected-error@+2{{invalid suffix 'F16' on floating constant}}
-#endif // !HAVE
 const auto g = 1.1F16;
Index: clang/test/Sema/conversion-target-dep.c
===================================================================
--- clang/test/Sema/conversion-target-dep.c
+++ clang/test/Sema/conversion-target-dep.c
@@ -6,7 +6,7 @@
 
 long double ld;
 double d;
-_Float16 f16; // x86-error {{_Float16 is not supported on this target}}
+_Float16 f16;
 
 int main() {
   ld = d; // x86-warning {{implicit conversion increases floating-point precision: 'double' to 'long double'}}
Index: clang/test/Sema/Float16.c
===================================================================
--- clang/test/Sema/Float16.c
+++ clang/test/Sema/Float16.c
@@ -1,18 +1,13 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
-// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
 
-#ifndef HAVE
-// expected-error@+2{{_Float16 is not supported on this target}}
-#endif // HAVE
 _Float16 f;
 
-#ifdef HAVE
 _Complex _Float16 a;
 void builtin_complex() {
   _Float16 a = 0;
   (void)__builtin_complex(a, a); // expected-error {{'_Complex _Float16' is invalid}}
 }
-#endif
Index: clang/test/CodeGen/X86/fp16-abi.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/X86/fp16-abi.c
@@ -0,0 +1,113 @@
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm < %s | FileCheck %s
+
+struct half1 {
+  _Float16 a;
+};
+
+struct half1 h1(_Float16 a) {
+  // CHECK: define{{.*}}half @h1
+  struct half1 x;
+  x.a = a;
+  return x;
+}
+
+struct half2 {
+  _Float16 a;
+  _Float16 b;
+};
+
+struct half2 h2(_Float16 a, _Float16 b) {
+  // CHECK: define{{.*}}<2 x half> @h2
+  struct half2 x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct half3 {
+  _Float16 a;
+  _Float16 b;
+  _Float16 c;
+};
+
+struct half3 h3(_Float16 a, _Float16 b, _Float16 c) {
+  // CHECK: define{{.*}}<4 x half> @h3
+  struct half3 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct half4 {
+  _Float16 a;
+  _Float16 b;
+  _Float16 c;
+  _Float16 d;
+};
+
+struct half4 h4(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
+  // CHECK: define{{.*}}<4 x half> @h4
+  struct half4 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  x.d = d;
+  return x;
+}
+
+struct floathalf {
+  float a;
+  _Float16 b;
+};
+
+struct floathalf fh(float a, _Float16 b) {
+  // CHECK: define {{.*}} <4 x half> @fh
+  struct floathalf x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct floathalf2 {
+  float a;
+  _Float16 b;
+  _Float16 c;
+};
+
+struct floathalf2 fh2(float a, _Float16 b, _Float16 c) {
+  // CHECK: define {{.*}} <4 x half> @fh2
+  struct floathalf2 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct halffloat {
+  _Float16 a;
+  float b;
+};
+
+struct halffloat hf(_Float16 a, float b) {
+  // CHECK: define {{.*}} <4 x half> @hf
+  struct halffloat x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct half2float {
+  _Float16 a;
+  _Float16 b;
+  float c;
+};
+
+struct half2float h2f(_Float16 a, _Float16 b, float c) {
+  // CHECK: define {{.*}} <4 x half> @h2f
+  struct half2float x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
Index: clang/test/CodeGen/X86/avx512fp16-complex.c
===================================================================
--- clang/test/CodeGen/X86/avx512fp16-complex.c
+++ clang/test/CodeGen/X86/avx512fp16-complex.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86
 
 _Float16 _Complex add_half_rr(_Float16 a, _Float16 b) {
   // X86-LABEL: @add_half_rr(
Index: clang/test/CodeGen/X86/avx512fp16-abi.c
===================================================================
--- clang/test/CodeGen/X86/avx512fp16-abi.c
+++ clang/test/CodeGen/X86/avx512fp16-abi.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm  -target-feature +avx512fp16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm  < %s | FileCheck %s --check-prefixes=CHECK,CHECK-C
 // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm  -target-feature +avx512fp16 -x c++ -std=c++11 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-CPP
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -x c++ -std=c++11 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-CPP
 
 struct half1 {
   _Float16 a;
Index: clang/test/CodeGen/X86/Float16-arithmetic.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/X86/Float16-arithmetic.c
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -triple  x86_64-unknown-unknown -emit-llvm  \
+// RUN: < %s  | FileCheck %s --check-prefixes=CHECK
+
+_Float16 add1(_Float16 a, _Float16 b) {
+  // CHECK-LABEL: define {{.*}} half @add1
+  // CHECK: alloca half
+  // CHECK: alloca half
+  // CHECK: store half {{.*}}, half*
+  // CHECK: store half {{.*}}, half*
+  // CHECK: load half, half*
+  // CHECK: load half, half* {{.*}}
+  // CHECK: fadd half {{.*}}, {{.*}}
+  // CHECK: ret half
+  return a + b;
+}
+
+_Float16 add2(_Float16 a, _Float16 b, _Float16 c) {
+  // CHECK-LABEL: define dso_local half @add2
+  // CHECK: alloca half
+  // CHECK: alloca half
+  // CHECK: alloca half
+  // CHECK: store half {{.*}}, half*
+  // CHECK: store half {{.*}}, half*
+  // CHECK: store half {{.*}}, half*
+  // CHECK: load half, half* {{.*}}
+  // CHECK: load half, half* {{.*}}
+  // CHECK: fadd half {{.*}}, {{.*}}
+  // CHECK: load half, half* {{.*}}
+  // CHECK: fadd half {{.*}}, {{.*}}
+  // CHECK: ret half
+    return a + b + c;
+}
+
+_Float16 sub(_Float16 a, _Float16 b) {
+  // CHECK-LABEL: define {{.*}} half @sub
+  // CHECK: alloca half
+  // CHECK: alloca half
+  // CHECK: store half {{.*}}, half*
+  // CHECK: store half {{.*}}, half*
+  // CHECK: load half, half*
+  // CHECK: load half, half* {{.*}}
+  // CHECK: fsub half {{.*}}, {{.*}}
+  // CHECK: ret half
+  return a - b;
+}
+
+_Float16 div(_Float16 a, _Float16 b) {
+  // CHECK-LABEL: define {{.*}} half @div
+  // CHECK: alloca half
+  // CHECK: alloca half
+  // CHECK: store half {{.*}}, half*
+  // CHECK: store half {{.*}}, half*
+  // CHECK: load half, half* {{.*}}
+  // CHECK: load half, half* {{.*}}
+  // CHECK: fdiv half {{.*}}, {{.*}}
+  // CHECK: ret half
+  return a / b;
+}
+
+_Float16 mul(_Float16 a, _Float16 b) {
+  // CHECK-LABEL: define {{.*}} half @mul
+  // CHECK: alloca half
+  // CHECK: alloca half
+  // CHECK: store half {{.*}}, half*
+  // CHECK: store half {{.*}}, half*
+  // CHECK: load half, half* {{.*}}
+  // CHECK: load half, half* {{.*}}
+  // CHECK: fmul half {{.*}}, {{.*}}
+  // CHECK: ret half
+  return a * b;
+}
+
+
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -239,7 +239,6 @@
       HasAVX512ER = true;
     } else if (Feature == "+avx512fp16") {
       HasAVX512FP16 = true;
-      HasFloat16 = true;
     } else if (Feature == "+avx512pf") {
       HasAVX512PF = true;
     } else if (Feature == "+avx512dq") {
@@ -369,6 +368,8 @@
                          .Default(NoXOP);
     XOPLevel = std::max(XOPLevel, XLevel);
   }
+  // Turn on _float16 for x86 (feature sse+)
+  HasFloat16 = SSELevel >= SSE2;
 
   // LLVM doesn't have a separate switch for fpmath, so only accept it if it
   // matches the selected sse level.
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -187,6 +187,7 @@
 --------------------
 
 - Support for ``AVX512-FP16`` instructions has been added.
+- Support for ``_Float16`` type has been added.
 
 Arm and AArch64 Support in Clang
 --------------------------------
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -673,7 +673,7 @@
 * 64-bit ARM (AArch64)
 * AMDGPU
 * SPIR
-* X86 (Only available under feature AVX512-FP16)
+* X86
 
 ``_Float16`` will be supported on more targets as they define ABIs for it.

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D114099: Enable `_Float16` type support on X86 without the avx512fp16 flag

Reply via email to