[PATCH] D40112: [CodeGen][X86] Fix handling of __fp16 vectors

Akira Hatanaka via Phabricator via cfe-commits Wed, 15 Nov 2017 16:47:40 -0800

ahatanak created this revision.
Herald added a subscriber: javed.absar.

IRGen for __fp16 vectors on X86 is currently completely broken. For example 
when the following code is compiled:


  half4 hv0, hv1, hv2; // these are vectors of __fp16.
  
  void foo221() {
    hv0 = hv1 + hv2;
  }

clang generates the following IR, in which two i16 values are added:

  @hv1 = common global <4 x i16> zeroinitializer, align 8
  @hv2 = common global <4 x i16> zeroinitializer, align 8
  @hv0 = common global <4 x i16> zeroinitializer, align 8
  
  define void @foo221() {
  entry:
    %0 = load <4 x i16>, <4 x i16>* @hv1, align 8
    %1 = load <4 x i16>, <4 x i16>* @hv2, align 8
    %add = add <4 x i16> %0, %1
    store <4 x i16> %add, <4 x i16>* @hv0, align 8
    ret void
  }

To fix IRGen for __fp16 vectors, this patch uses the code committed in r314056, 
which modified clang to promote and truncate __fp16 vectors to and from float 
vectors in the AST. Also, as the first step toward doing away with the __fp16 
conversion intrinsics such as @llvm.convert.to.fp16 (see 
http://lists.llvm.org/pipermail/llvm-dev/2014-July/074689.html),  I made 
changes to IRGen for __fp16 scalars so that fpext/fptrunc instructions are 
emitted instead of the __fp16 conversion intrinsics IRGen currently emits. This 
fixes another IRGen bug where a short value is assigned to an __fp16 variable 
without any integer-to-floating-point conversion, as shown in the following 
example:

C code
------

  __fp16 a;
  short b;
  
  void foo1() {
    a = b;
  }



generated IR
------------

  @b = common global i16 0, align 2
  @a = common global i16 0, align 2
  
  define void @foo1() #0 {
  entry:
    %0 = load i16, i16* @b, align 2
    store i16 %0, i16* @a, align 2
    ret void
  }

I haven't spent too much time inspecting the code the X86 backend emits, but 
the code I've seen so far seems at least functionally correct (although it 
doesn't look very efficient since the backend scalarizes __fp16 vectors).


https://reviews.llvm.org/D40112

Files:
  include/clang/Basic/TargetInfo.h
  lib/Basic/Targets/AArch64.h
  lib/Basic/Targets/ARM.h
  lib/Basic/Targets/X86.h
  lib/CodeGen/CGExprConstant.cpp
  lib/CodeGen/CGExprScalar.cpp
  lib/CodeGen/CodeGenTypes.cpp
  lib/Sema/SemaExpr.cpp
  test/CodeGen/fp16-ops.c
  test/CodeGen/fp16vec-ops.c
  test/CodeGenCXX/float16-declarations.cpp
  test/CodeGenCXX/fp16-mangle.cpp

Index: test/CodeGenCXX/fp16-mangle.cpp
===================================================================
--- test/CodeGenCXX/fp16-mangle.cpp
+++ test/CodeGenCXX/fp16-mangle.cpp
@@ -4,9 +4,9 @@
 template <typename T, typename U> struct S { static int i; };
 template <> int S<__fp16, __fp16>::i = 3;
 
-// CHECK-LABEL: define void @_Z1fPDh(i16* %x)
+// CHECK-LABEL: define void @_Z1fPDh(half* %x)
 void f (__fp16 *x) { }
 
-// CHECK-LABEL: define void @_Z1gPDhS_(i16* %x, i16* %y)
+// CHECK-LABEL: define void @_Z1gPDhS_(half* %x, half* %y)
 void g (__fp16 *x, __fp16 *y) { }
 
Index: test/CodeGenCXX/float16-declarations.cpp
===================================================================
--- test/CodeGenCXX/float16-declarations.cpp
+++ test/CodeGenCXX/float16-declarations.cpp
@@ -11,16 +11,14 @@
 // CHECK-DAG: @_ZN12_GLOBAL__N_13f1nE = internal global half 0xH0000, align 2
 
   _Float16 f2n = 33.f16;
-// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2
-// CHECK-X86-DAG:     @_ZN12_GLOBAL__N_13f2nE = internal global i16 20512, align 2
+// CHECK-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2
 
   _Float16 arr1n[10];
 // CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 2
 // CHECK-X86-DAG:     @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 16
 
   _Float16 arr2n[] = { 1.2, 3.0, 3.e4 };
-// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2
-// CHECK-X86-DAG:     @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x i16] [i16 15565, i16 16896, i16 30547], align 2
+// CHECK-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2
 
   const volatile _Float16 func1n(const _Float16 &arg) {
     return arg + f2n + arr1n[4] - arr2n[1];
@@ -35,16 +33,14 @@
 // CHECK-X86-DAG: @f1f = global half 0xH0000, align 2
 
 _Float16 f2f = 32.4;
-// CHECK-AARCH64-DAG: @f2f = global half 0xH500D, align 2
-// CHECK-X86-DAG: @f2f = global i16 20493, align 2
+// CHECK-DAG: @f2f = global half 0xH500D, align 2
 
 _Float16 arr1f[10];
 // CHECK-AARCH64-DAG: @arr1f = global [10 x half] zeroinitializer, align 2
 // CHECK-X86-DAG: @arr1f = global [10 x half] zeroinitializer, align 16
 
 _Float16 arr2f[] = { -1.2, -3.0, -3.e4 };
-// CHECK-AARCH64-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2
-// CHECK-X86-DAG: @arr2f = global [3 x i16] [i16 -17203, i16 -15872, i16 -2221], align 2
+// CHECK-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2
 
 _Float16 func1f(_Float16 arg);
 
@@ -110,11 +106,9 @@
 // CHECK-DAG:  call void @_ZN2C1C2EDF16_(%class.C1* %{{.*}}, half %{{.*}})
 
   S1<_Float16> s1 = { 132.f16 };
-// CHECK-AARCH64-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2
-// CHECK-X86-DAG:     @_ZZ4mainE2s1 = private unnamed_addr constant { i16 } { i16 22560 }, align 2
+// CHECK-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2
 // CHECK-DAG:  [[S1:%[0-9]+]] = bitcast %struct.S1* %{{.*}} to i8*
-// CHECK-AARCH64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
-// CHECK-X86-DAG:     call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* bitcast ({ i16 }* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
+// CHECK-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
 
   _Float16 f4l = func1n(f1l)  + func1f(f2l) + c1.func1c(f3l) + c1.func2c(f1l) +
     func1t(f1l) + s1.mem2 - f1n + f2n;
@@ -129,8 +123,7 @@
 // CHECK-DAG:  store half [[INC]], half* %{{.*}}, align 2
 
   _Float16 arr1l[] = { -1.f16, -0.f16, -11.f16 };
-// CHECK-AARCH64-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2
-// CHECK-X86-DAG:     @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x i16] [i16 -17408, i16 -32768, i16 -13952], align 2
+// CHECK-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2
 
   float cvtf = f2n;
 //CHECK-DAG: [[H2F:%[a-z0-9]+]] = fpext half {{%[0-9]+}} to float
Index: test/CodeGen/fp16vec-ops.c
===================================================================
--- test/CodeGen/fp16vec-ops.c
+++ test/CodeGen/fp16vec-ops.c
@@ -1,6 +1,7 @@
 // REQUIRES: arm-registered-target
 // RUN: %clang_cc1 -triple arm64-apple-ios9 -emit-llvm -o - -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=CHECK
 // RUN: %clang_cc1 -triple armv7-apple-ios9 -emit-llvm -o - -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.13 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK
 
 typedef __fp16 half4 __attribute__ ((vector_size (8)));
 typedef short short4 __attribute__ ((vector_size (8)));
Index: test/CodeGen/fp16-ops.c
===================================================================
--- test/CodeGen/fp16-ops.c
+++ test/CodeGen/fp16-ops.c
@@ -1,8 +1,9 @@
 // REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-linux-gnu %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
 // RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fnative-half-type %s \
 // RUN:   | FileCheck %s --check-prefix=NATIVE-HALF
 // RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fnative-half-type %s \
@@ -16,30 +17,28 @@
 volatile __fp16 h0 = 0.0, h1 = 1.0, h2;
 volatile float f0, f1, f2;
 volatile double d0;
+short s0;
 
 void foo(void) {
   // CHECK-LABEL: define void @foo()
 
   // Check unary ops
 
-  // NOHALF: [[F16TOF32:call float @llvm.convert.from.fp16.f32]]
-  // HALF: [[F16TOF32:fpext half]]
+  // NOTNATIVE: [[F16TOF32:fpext half]]
   // CHECK: fptoui float
   // NATIVE-HALF: fptoui half
   test = (h0);
   // CHECK: uitofp i32
-  // NOHALF: [[F32TOF16:call i16 @llvm.convert.to.fp16.f32]]
-  // HALF: [[F32TOF16:fptrunc float]]
+  // NOTNATIVE: [[F32TOF16:fptrunc float]]
   // NATIVE-HALF: uitofp i32 {{.*}} to half
   h0 = (test);
   // CHECK: [[F16TOF32]]
   // CHECK: fcmp une float
   // NATIVE-HALF: fcmp une half
   test = (!h1);
   // CHECK: [[F16TOF32]]
   // CHECK: fsub float
-  // NOHALF: [[F32TOF16]]
-  // HALF: [[F32TOF16]]
+  // NOTNATIVE: [[F32TOF16]]
   // NATIVE-HALF: fsub half
   h1 = -h1;
   // CHECK: [[F16TOF32]]
@@ -76,8 +75,6 @@
   // NATIVE-HALF: fmul half
   h1 = h0 * h2;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F32TOF16]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fmul float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fmul half
@@ -107,7 +104,6 @@
   // NATIVE-HALF: fdiv half
   h1 = (h0 / h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fdiv float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fdiv half
@@ -137,7 +133,6 @@
   // NATIVE-HALF: fadd half
   h1 = (h2 + h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fadd float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fadd half
@@ -167,7 +162,6 @@
   // NATIVE-HALF: fsub half
   h1 = (h2 - h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fsub float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fsub half
@@ -196,7 +190,6 @@
   // NATIVE-HALF: fcmp olt half
   test = (h2 < h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp olt float
   // NATIVE-HALF: fcmp olt half
   test = (h2 < (__fp16)42.0);
@@ -225,7 +218,6 @@
   // NATIVE-HALF: fcmp ogt half
   test = (h0 > h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp ogt float
   // NATIVE-HALF: fcmp ogt half
   test = ((__fp16)42.0 > h2);
@@ -254,7 +246,6 @@
   // NATIVE-HALF: fcmp ole half
   test = (h2 <= h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp ole float
   // NATIVE-HALF: fcmp ole half
   test = (h2 <= (__fp16)42.0);
@@ -284,7 +275,6 @@
   // NATIVE-HALF: fcmp oge half
   test = (h0 >= h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp oge float
   // NATIVE-HALF: fcmp oge half
   test = (h0 >= (__fp16)-2.0);
@@ -313,7 +303,6 @@
   // NATIVE-HALF: fcmp oeq half
   test = (h1 == h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp oeq float
   // NATIVE-HALF: fcmp oeq half
   test = (h1 == (__fp16)1.0);
@@ -342,7 +331,6 @@
   // NATIVE-HALF: fcmp une half
   test = (h1 != h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp une float
   // NATIVE-HALF: fcmp une half
   test = (h1 != (__fp16)1.0);
@@ -374,8 +362,7 @@
   h1 = (h1 ? h2 : h0);
   // Check assignments (inc. compound)
   h0 = h1;
-  // NOHALF: [[F32TOF16]]
-  // HALF: store {{.*}} half 0xHC000
+  // NOTNATIVE: store {{.*}} half 0xHC000
   // NATIVE-HALF: store {{.*}} half 0xHC000
   h0 = (__fp16)-2.0f;
   // CHECK: [[F32TOF16]]
@@ -398,7 +385,6 @@
   // NATIVE-HALF: fadd half
   h0 += h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fadd float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fadd half
@@ -433,7 +419,6 @@
   // NATIVE-HALF: fsub half
   h0 -= h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fsub float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fsub half
@@ -468,7 +453,6 @@
   // NATIVE-HALF: fmul half
   h0 *= h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fmul float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fmul half
@@ -503,7 +487,6 @@
   // NATIVE-HALF: fdiv half
   h0 /= h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fdiv float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fdiv half
@@ -532,27 +515,29 @@
   h0 /= i0;
 
   // Check conversions to/from double
-  // NOHALF: call i16 @llvm.convert.to.fp16.f64(
-  // HALF: fptrunc double {{.*}} to half
+  // NOTNATIVE: fptrunc double {{.*}} to half
   // NATIVE-HALF: fptrunc double {{.*}} to half
   h0 = d0;
 
   // CHECK: [[MID:%.*]] = fptrunc double {{%.*}} to float
-  // NOHALF: call i16 @llvm.convert.to.fp16.f32(float [[MID]])
-  // HALF: fptrunc float [[MID]] to half
+  // NOTNATIVE: fptrunc float [[MID]] to half
   // NATIVE-HALF: [[MID:%.*]] = fptrunc double {{%.*}} to float
   // NATIVE-HALF: fptrunc float {{.*}} to half
   h0 = (float)d0;
 
-  // NOHALF: call double @llvm.convert.from.fp16.f64(
-  // HALF: fpext half {{.*}} to double
+  // NOTNATIVE: fpext half {{.*}} to double
   // NATIVE-HALF: fpext half {{.*}} to double
   d0 = h0;
 
-  // NOHALF: [[MID:%.*]] = call float @llvm.convert.from.fp16.f32(
-  // HALF: [[MID:%.*]] = fpext half {{.*}} to float
+  // NOTNATIVE: [[MID:%.*]] = fpext half {{.*}} to float
   // CHECK: fpext float [[MID]] to double
   // NATIVE-HALF: [[MID:%.*]] = fpext half {{.*}} to float
   // NATIVE-HALF: fpext float [[MID]] to double
   d0 = (float)h0;
+
+  // NOTNATIVE: [[V1:%.*]] = load i16, i16* @s0
+  // NOTNATIVE: [[CONV:%.*]] = sitofp i16 [[V1]] to float
+  // NOTNATIVE: [[TRUNC:%.*]] = fptrunc float [[CONV]] to half
+  // NOTNATIVE: store volatile half [[TRUNC]], half* @h0
+  h0 = s0;
 }
Index: lib/Sema/SemaExpr.cpp
===================================================================
--- lib/Sema/SemaExpr.cpp
+++ lib/Sema/SemaExpr.cpp
@@ -11565,7 +11565,8 @@
 static bool needsConversionOfHalfVec(bool OpRequiresConversion, ASTContext &Ctx,
                                      QualType SrcType) {
   return OpRequiresConversion && !Ctx.getLangOpts().NativeHalfType &&
-         Ctx.getLangOpts().HalfArgsAndReturns && isVector(SrcType, Ctx.HalfTy);
+         !Ctx.getTargetInfo().useFP16ConversionIntrinsics() &&
+         isVector(SrcType, Ctx.HalfTy);
 }
 
 /// CreateBuiltinBinOp - Creates a new built-in binary operation with
Index: lib/CodeGen/CodeGenTypes.cpp
===================================================================
--- lib/CodeGen/CodeGenTypes.cpp
+++ lib/CodeGen/CodeGenTypes.cpp
@@ -451,10 +451,10 @@
 
     case BuiltinType::Half:
       // Half FP can either be storage-only (lowered to i16) or native.
-      ResultType =
-          getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T),
-                           Context.getLangOpts().NativeHalfType ||
-                               Context.getLangOpts().HalfArgsAndReturns);
+      ResultType = getTypeForFormat(
+          getLLVMContext(), Context.getFloatTypeSemantics(T),
+          Context.getLangOpts().NativeHalfType ||
+              !Context.getTargetInfo().useFP16ConversionIntrinsics());
       break;
     case BuiltinType::Float:
     case BuiltinType::Double:
Index: lib/CodeGen/CGExprScalar.cpp
===================================================================
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -951,15 +951,15 @@
   if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
     // Cast to FP using the intrinsic if the half type itself isn't supported.
     if (DstTy->isFloatingPointTy()) {
-      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns)
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics())
         return Builder.CreateCall(
             CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, DstTy),
             Src);
     } else {
       // Cast to other types through float, using either the intrinsic or FPExt,
       // depending on whether the half type itself is supported
       // (as opposed to operations on half, available with NativeHalfType).
-      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
         Src = Builder.CreateCall(
             CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
                                  CGF.CGM.FloatTy),
@@ -1068,7 +1068,7 @@
     if (SrcTy->isFloatingPointTy()) {
       // Use the intrinsic if the half type itself isn't supported
       // (as opposed to operations on half, available with NativeHalfType).
-      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns)
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics())
         return Builder.CreateCall(
             CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src);
       // If the half type is supported, just use an fptrunc.
@@ -1104,7 +1104,7 @@
   }
 
   if (DstTy != ResTy) {
-    if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+    if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
       assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion");
       Res = Builder.CreateCall(
         CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy),
@@ -2028,7 +2028,7 @@
 
     if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
       // Another special case: half FP increment should be done via float
-      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
         value = Builder.CreateCall(
             CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
                                  CGF.CGM.FloatTy),
@@ -2063,7 +2063,7 @@
     value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
 
     if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
-      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+      if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) {
         value = Builder.CreateCall(
             CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16,
                                  CGF.CGM.FloatTy),
Index: lib/CodeGen/CGExprConstant.cpp
===================================================================
--- lib/CodeGen/CGExprConstant.cpp
+++ lib/CodeGen/CGExprConstant.cpp
@@ -1825,7 +1825,7 @@
     const llvm::APFloat &Init = Value.getFloat();
     if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf() &&
         !CGM.getContext().getLangOpts().NativeHalfType &&
-        !CGM.getContext().getLangOpts().HalfArgsAndReturns)
+        CGM.getContext().getTargetInfo().useFP16ConversionIntrinsics())
       return llvm::ConstantInt::get(CGM.getLLVMContext(),
                                     Init.bitcastToAPInt());
     else
Index: lib/Basic/Targets/X86.h
===================================================================
--- lib/Basic/Targets/X86.h
+++ lib/Basic/Targets/X86.h
@@ -361,6 +361,10 @@
     return "";
   }
 
+  bool useFP16ConversionIntrinsics() const override {
+    return false;
+  }
+
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
   
Index: lib/Basic/Targets/ARM.h
===================================================================
--- lib/Basic/Targets/ARM.h
+++ lib/Basic/Targets/ARM.h
@@ -126,6 +126,10 @@
 
   bool setFPMath(StringRef Name) override;
 
+  bool useFP16ConversionIntrinsics() const override {
+    return false;
+  }
+
   void getTargetDefinesARMV81A(const LangOptions &Opts,
                                MacroBuilder &Builder) const;
 
Index: lib/Basic/Targets/AArch64.h
===================================================================
--- lib/Basic/Targets/AArch64.h
+++ lib/Basic/Targets/AArch64.h
@@ -48,6 +48,10 @@
   bool isValidCPUName(StringRef Name) const override;
   bool setCPU(const std::string &Name) override;
 
+  bool useFP16ConversionIntrinsics() const override {
+    return false;
+  }
+
   void getTargetDefinesARMV81A(const LangOptions &Opts,
                                MacroBuilder &Builder) const;
   void getTargetDefinesARMV82A(const LangOptions &Opts,
Index: include/clang/Basic/TargetInfo.h
===================================================================
--- include/clang/Basic/TargetInfo.h
+++ include/clang/Basic/TargetInfo.h
@@ -559,6 +559,13 @@
     return ComplexLongDoubleUsesFP2Ret;
   }
 
+  /// Check whether llvm intrinsics such as llvm.convert.to.fp16 should be used
+  /// to convert to and from __fp16. This function should be removed once all
+  /// targets stop using the conversion intrinsics.
+  virtual bool useFP16ConversionIntrinsics() const {
+    return true;
+  }
+
   /// \brief Specify if mangling based on address space map should be used or
   /// not for language specific address spaces
   bool useAddressSpaceMapMangling() const {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D40112: [CodeGen][X86] Fix handling of __fp16 vectors

Reply via email to