zahiraam updated this revision to Diff 441167.
zahiraam marked 2 inline comments as done.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D113107/new/
https://reviews.llvm.org/D113107
Files:
clang/docs/LanguageExtensions.rst
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/TargetInfo.h
clang/lib/Basic/Targets/X86.cpp
clang/lib/Basic/Targets/X86.h
clang/lib/CodeGen/CGExprComplex.cpp
clang/lib/CodeGen/CGExprScalar.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/test/CodeGen/X86/Float16-arithmetic.c
clang/test/CodeGen/X86/Float16-complex.c
clang/test/Sema/Float16.c
clang/test/SemaCXX/Float16.cpp
Index: clang/test/SemaCXX/Float16.cpp
===================================================================
--- clang/test/SemaCXX/Float16.cpp
+++ clang/test/SemaCXX/Float16.cpp
@@ -1,20 +1,10 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
-#ifdef HAVE
// expected-no-diagnostics
-#endif // HAVE
-#ifndef HAVE
-// expected-error@+2{{_Float16 is not supported on this target}}
-#endif // !HAVE
_Float16 f;
-#ifndef HAVE
-// expected-error@+2{{invalid suffix 'F16' on floating constant}}
-#endif // !HAVE
const auto g = 1.1F16;
Index: clang/test/Sema/Float16.c
===================================================================
--- clang/test/Sema/Float16.c
+++ clang/test/Sema/Float16.c
@@ -1,19 +1,23 @@
+<<<<<<< HEAD
// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE
// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE
+=======
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
-#ifndef HAVE
-// expected-error@+2{{_Float16 is not supported on this target}}
-#endif // HAVE
-_Float16 f;
-
-#ifdef HAVE
_Complex _Float16 a;
void builtin_complex(void) {
_Float16 a = 0;
(void)__builtin_complex(a, a); // expected-error {{'_Complex _Float16' is invalid}}
}
-#endif
Index: clang/test/CodeGen/X86/Float16-complex.c
===================================================================
--- clang/test/CodeGen/X86/Float16-complex.c
+++ clang/test/CodeGen/X86/Float16-complex.c
@@ -1,134 +1,407 @@
-// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86
-// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefixes=CHECK,AVX
+// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefixes=CHECK,X86
_Float16 _Complex add_half_rr(_Float16 a, _Float16 b) {
- // X86-LABEL: @add_half_rr(
- // X86: fadd
- // X86-NOT: fadd
- // X86: ret
+ // CHECK-LABEL: @add_half_rr(
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK-NEXT: [[B:%.*]] = alloca half
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+ // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // AVX-NEXT: [[AB_ADD:%.*]] = fadd half [[A_LOAD]], [[B_LOAD]]
+ // AVX: store half [[AB_ADD]], {{.*}}
+
+ // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // X86-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[B_EXT]]
+ // X86-NEXT: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half
+ // X86: store half [[AB_ADD_TRUNC]], {{.*}}
return a + b;
}
+
_Float16 _Complex add_half_cr(_Float16 _Complex a, _Float16 b) {
- // X86-LABEL: @add_half_cr(
- // X86: fadd
- // X86-NOT: fadd
- // X86: ret
+ // CHECK-LABEL: @add_half_cr(
+ // CHECK: [[B:%.*]] = alloca half
+ // CHECK: [[AR:%.*]] = load half, ptr {{.*}}
+ // CHECK: [[AI:%.*]] = load half, ptr {{.*}}
+
+ // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]]
+ // AVX: [[AB_ADD:%.*]] = fadd half [[AR]], [[A_LOAD]]
+ // AVX: store half [[AB_ADD]], ptr {{.*}}
+ // AVX: store half [[AI]], ptr {{.*}}
+
+ // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float
+ // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float
+ // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // X86-NEXT: [[AB_ADD:%.*]] = fadd float [[AR_EXT]], [[B_EXT]]
+ // X86: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half
+ // X86-NEXT: [[AI_TRUNC:%.*]] = fptrunc float [[AI_EXT]] to half
+ // X86: store half [[AB_ADD_TRUNC]], ptr {{.*}}
+ // X86-NEXT: store half [[AI_TRUNC]], ptr {{.*}}
return a + b;
}
+
_Float16 _Complex add_half_rc(_Float16 a, _Float16 _Complex b) {
- // X86-LABEL: @add_half_rc(
- // X86: fadd
- // X86-NOT: fadd
- // X86: ret
+ // CHECK-LABEL: @add_half_rc(
+
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK: store half %a, ptr [[A]]
+ // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+ // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[AB_ADD:%.*]] = fadd half [[A_LOAD]], [[BR_LOAD]]
+ // AVX: store half [[AB_ADD]], ptr {{.*}}
+ // AVX-NEXT: store half [[BI_LOAD]], ptr {{.*}}
+
+ // X86: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+ // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+ // X86-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[BR_EXT]]
+ // X86-NEXT: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half
+ // X86-NEXT: [[BI_TRUNC:%.*]] = fptrunc float [[BI_EXT]] to half
+ // X86: store half [[AB_ADD_TRUNC]], ptr {{.*}}
+ // X86-NEXT: store half [[BI_TRUNC]], ptr {{.*}}
return a + b;
}
+
_Float16 _Complex add_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
- // X86-LABEL: @add_half_cc(
- // X86: fadd
- // X86: fadd
- // X86-NOT: fadd
- // X86: ret
+ // CHECK-LABEL: @add_half_cc(
+
+ // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}}
+ // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}}
+
+ // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX-NEXT: [[AB_ADDR:%.*]] = fadd half [[AR_LOAD]], [[BR_LOAD]]
+ // AVX-NEXT: [[AB_ADDI:%.*]] = fadd half [[AI_LOAD]], [[BI_LOAD]]
+ // AVX: store half [[AB_ADDR]], ptr {{.*}}
+ // AVX-NEXT: store half [[AB_ADDI]], ptr {{.*}}
+
+ // X86: [[AR_LOAD_EXT:%.*]] = fpext half [[AR_LOAD]] to float
+ // X86: [[AI_LOAD_EXT:%.*]] = fpext half [[AI_LOAD]] to float
+ // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+ // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+ // X86-NEXT: [[AB_ADDR:%.*]] = fadd float [[AR_LOAD_EXT]], [[BR_EXT]]
+ // X86-NEXT: [[AB_ADDI:%.*]] = fadd float [[AI_LOAD_EXT]], [[BI_EXT]]
+ // X86-NEXT: [[AB_ADDR_TRUNC:%.*]] = fptrunc float [[AB_ADDR]] to half
+ // X86-NEXT: [[AB_ADDI_TRUNC:%.*]] = fptrunc float [[AB_ADDI]] to half
+ // X86: store half [[AB_ADDR_TRUNC]], ptr {{.*}}
+ // X86-NEXT: store half [[AB_ADDI_TRUNC]], ptr {{.*}}
return a + b;
}
_Float16 _Complex sub_half_rr(_Float16 a, _Float16 b) {
- // X86-LABEL: @sub_half_rr(
- // X86: fsub
- // X86-NOT: fsub
- // X86: ret
+ // CHECK-LABEL: @sub_half_rr(
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK-NEXT: [[B:%.*]] = alloca half
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+ // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // AVX-NEXT: [[AB_SUB:%.*]] = fsub half [[A_LOAD]], [[B_LOAD]]
+ // AVX: store half [[AB_SUB]], {{.*}}
+ // AVX-NEXT: store half 0xH0000, {{.*}}
+
+ // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // X86-NEXT: [[AB_SUB:%.*]] = fsub float [[A_EXT]], [[B_EXT]]
+ // X86-NEXT: [[AB_SUB_TRUNC:%.*]] = fptrunc float [[AB_SUB]] to half
+ // X86: store half [[AB_SUB_TRUNC]], {{.*}}
+ // X86-NEXT: store half 0xH0000, {{.*}}
return a - b;
}
+
_Float16 _Complex sub_half_cr(_Float16 _Complex a, _Float16 b) {
- // X86-LABEL: @sub_half_cr(
- // X86: fsub
- // X86-NOT: fsub
- // X86: ret
+ // CHECK-LABEL: @sub_half_cr(
+ // CHECK: [[B:%.*]] = alloca half
+ // CHECK: [[AR:%.*]] = load half, ptr {{.*}}
+ // CHECK: [[AI:%.*]] = load half, ptr {{.*}}
+
+ // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]]
+ // AVX: [[AB_SUB:%.*]] = fsub half [[AR]], [[A_LOAD]]
+ // AVX: store half [[AB_SUB]], ptr {{.*}}
+ // AVX: store half [[AI]], ptr {{.*}}
+
+ // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float
+ // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float
+ // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // X86-NEXT: [[AB_SUB:%.*]] = fsub float [[AR_EXT]], [[B_EXT]]
+ // X86-NEXT: fptrunc float [[AB_SUB]] to half
+ // X86-NEXT: fptrunc float [[AI_EXT]] to half
return a - b;
}
+
_Float16 _Complex sub_half_rc(_Float16 a, _Float16 _Complex b) {
- // X86-LABEL: @sub_half_rc(
- // X86: fsub
- // X86: fneg
- // X86-NOT: fsub
- // X86: ret
+ // CHECK-LABEL: @sub_half_rc(
+
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK: store half {{.*}}, ptr [[A]]
+ // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+ // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[AB_SUBR:%.*]] = fsub half [[A_LOAD]], [[BR_LOAD]]
+ // AVX: [[AB_SUBI:%.*]] = fneg half {{.*}}
+ // AVX: store half [[AB_SUBR]], ptr {{.*}}
+ // AVX-NEXT: store half [[AB_SUBI]], ptr {{.*}}
+
+ // X86: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+ // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+ // X86-NEXT: [[AB_SUBR:%.*]] = fsub float [[A_EXT]], [[BR_EXT]]
+ // X86-NEXT: [[AB_SUBI:%.*]] = fneg float [[BI_EXT]]
+ // X86-NEXT: [[AB_SUBR_TRUNC:%.*]] = fptrunc float [[AB_SUBR]] to half
+ // X86-NEXT: [[AB_SUBI_TRUNC:%.*]] = fptrunc float [[AB_SUBI]] to half
+ // X86: store half [[AB_SUBR_TRUNC]], ptr {{.*}}
+ // X86-NEXT: store half [[AB_SUBI_TRUNC]], ptr {{.*}}
return a - b;
}
+
_Float16 _Complex sub_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
- // X86-LABEL: @sub_half_cc(
- // X86: fsub
- // X86: fsub
- // X86-NOT: fsub
- // X86: ret
+ // CHECK-LABEL: @sub_half_cc(
+
+ // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}}
+ // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}}
+
+ // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX-NEXT: [[AB_SUBR:%.*]] = fsub half [[AR_LOAD]], [[BR_LOAD]]
+ // AVX-NEXT: [[AB_SUBI:%.*]] = fsub half [[AI_LOAD]], [[BI_LOAD]]
+ // AVX: store half [[AB_SUBR]], ptr {{.*}}
+ // AVX-NEXT: store half [[AB_SUBI]], ptr {{.*}}
+
+ // X86: [[AR_LOAD_EXT:%.*]] = fpext half [[AR_LOAD]] to float
+ // X86: [[AI_LOAD_EXT:%.*]] = fpext half [[AI_LOAD]] to float
+ // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+ // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+ // X86-NEXT: [[AB_SUBR:%.*]] = fsub float [[AR_LOAD_EXT]], [[BR_EXT]]
+ // X86-NEXT: [[AB_SUBI:%.*]] = fsub float [[AI_LOAD_EXT]], [[BI_EXT]]
+ // X86-NEXT: [[AB_SUBR_TRUNC:%.*]] = fptrunc float [[AB_SUBR]] to half
+ // X86-NEXT: [[AB_SUBI_TRUNC:%.*]] = fptrunc float [[AB_SUBI]] to half
+ // X86: store half [[AB_SUBR_TRUNC]], ptr {{.*}}
+ // X86-NEXT: store half [[AB_SUBI_TRUNC]], ptr {{.*}}
return a - b;
}
_Float16 _Complex mul_half_rr(_Float16 a, _Float16 b) {
- // X86-LABEL: @mul_half_rr(
- // X86: fmul
- // X86-NOT: fmul
- // X86: ret
+ // CHECK-LABEL: @mul_half_rr(
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK-NEXT: [[B:%.*]] = alloca half
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+ // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // AVX-NEXT: [[AB_MUL:%.*]] = fmul half [[A_LOAD]], [[B_LOAD]]
+ // AVX: store half [[AB_MUL]], {{.*}}
+ // AVX-NEXT: store half 0xH0000, {{.*}}
+
+ // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // X86-NEXT: [[AB_MUL:%.*]] = fmul float [[A_EXT]], [[B_EXT]]
+ // X86-NEXT: [[AB_MUL_TRUNC:%.*]] = fptrunc float [[AB_MUL]] to half
+ // X86: store half [[AB_MUL_TRUNC]], {{.*}}
+ // X86-NEXT: store half 0xH0000, {{.*}}
return a * b;
}
+
_Float16 _Complex mul_half_cr(_Float16 _Complex a, _Float16 b) {
// X86-LABEL: @mul_half_cr(
- // X86: fmul
- // X86: fmul
- // X86-NOT: fmul
- // X86: ret
+ // CHECK: [[B:%.*]] = alloca half
+ // CHECK: [[AR:%.*]] = load half, ptr {{.*}}
+ // CHECK: [[AI:%.*]] = load half, ptr {{.*}}
+
+ // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]]
+ // AVX: [[AB_MULR:%.*]] = fmul half [[AR]], [[A_LOAD]]
+ // AVX: [[AB_MULI:%.*]] = fmul half [[AI]], [[A_LOAD]]
+ // AVX: store half [[AB_MULR]], ptr {{.*}}
+ // AVX: store half [[AB_MULI]], ptr {{.*}}
+
+ // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float
+ // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float
+ // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // X86-NEXT: [[AB_MULR:%.*]] = fmul float [[AR_EXT]], [[B_EXT]]
+ // X86-NEXT: [[AB_MULI:%.*]] = fmul float [[AI_EXT]], [[B_EXT]]
+ // X86-NEXT: [[AB_MULR_TRUNC:%.*]] = fptrunc float [[AB_MULR]] to half
+ // X86-NEXT: [[AB_MULI_TRUNC:%.*]] = fptrunc float [[AB_MULI]] to half
+ // X86: store half [[AB_MULR_TRUNC]], ptr {{.*}}
+ // X86-NEXT: store half [[AB_MULI_TRUNC]], ptr {{.*}}
return a * b;
}
+
_Float16 _Complex mul_half_rc(_Float16 a, _Float16 _Complex b) {
- // X86-LABEL: @mul_half_rc(
- // X86: fmul
- // X86: fmul
- // X86-NOT: fmul
- // X86: ret
+ // CHECK-LABEL: @mul_half_rc(
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK: store half %a, ptr [[A]]
+ // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+ // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[AB_MULR:%.*]] = fmul half [[A_LOAD]], [[BR_LOAD]]
+ // AVX: [[AB_MULI:%.*]] = fmul half [[A_LOAD]], [[BI_LOAD]]
+ // AVX: store half [[AB_MULR]], ptr {{.*}}
+ // AVX-NEXT: store half [[AB_MULI]], ptr {{.*}}
+
+ // X86: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+ // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+ // X86-NEXT: [[AB_MULR:%.*]] = fmul float [[A_EXT]], [[BR_EXT]]
+ // X86-NEXT: [[AB_MULI:%.*]] = fmul float [[A_EXT]], [[BI_EXT]]
+ // X86-NEXT: [[AB_MULR_TRUNC:%.*]] = fptrunc float [[AB_MULR]] to half
+ // X86-NEXT: [[AB_MULI_TRUNC:%.*]] = fptrunc float [[AB_MULI]] to half
+ // X86: store half [[AB_MULR_TRUNC]], ptr {{.*}}
+ // X86-NEXT: store half [[AB_MULI_TRUNC]], ptr {{.*}}
return a * b;
}
+
_Float16 _Complex mul_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
- // X86-LABEL: @mul_half_cc(
- // X86: %[[AC:[^ ]+]] = fmul
- // X86: %[[BD:[^ ]+]] = fmul
- // X86: %[[AD:[^ ]+]] = fmul
- // X86: %[[BC:[^ ]+]] = fmul
- // X86: %[[RR:[^ ]+]] = fsub half %[[AC]], %[[BD]]
- // X86: %[[RI:[^ ]+]] = fadd half
- // X86-DAG: %[[AD]]
- // X86-DAG: ,
- // X86-DAG: %[[BC]]
- // X86: fcmp uno half %[[RR]]
- // X86: fcmp uno half %[[RI]]
- // X86: call {{.*}} @__mulhc3(
- // X86: ret
+ // CHECK: @mul_half_cc(
+ // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}}
+ // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}}
+
+ // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX-NEXT: [[AC:%.*]] = fmul half [[AR_LOAD]], [[BR_LOAD]]
+ // AVX-NEXT: [[BD:%.*]] = fmul half [[AI_LOAD]], [[BI_LOAD]]
+ // AVX-NEXT: [[AD:%.*]] = fmul half [[AR_LOAD]], [[BI_LOAD]]
+ // AVX-NEXT: [[BC:%.*]] = fmul half [[AI_LOAD]], [[BR_LOAD]]
+ // AVX: call <2 x half> @__mulhc3(
+
+ // X86: [[AR_EXT:%.*]] = fpext half [[AR_LOAD]] to float
+ // X86: [[AI_EXT:%.*]] = fpext half [[AI_LOAD]] to float
+ // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+ // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+ // X86-NEXT: [[AC:%.*]] = fmul float [[AR_EXT]], [[BR_EXT]]
+ // X86-NEXT: [[BD:%.*]] = fmul float [[AI_EXT]], [[BI_EXT]]
+ // X86-NEXT: [[AD:%.*]] = fmul float [[AR_EXT]], [[BI_EXT]]
+ // X86-NEXT: [[BC:%.*]] = fmul float [[AI_EXT]], [[BR_EXT]]
+ // X86: call <2 x float> @__mulsc3(
return a * b;
}
-
_Float16 _Complex div_half_rr(_Float16 a, _Float16 b) {
- // X86-LABEL: @div_half_rr(
- // X86: fdiv
- // X86-NOT: fdiv
- // X86: ret
+ // CHECK: @div_half_rr(
+
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK-NEXT: [[B:%.*]] = alloca half
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+
+ // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // AVX-NEXT: [[AB_DIV:%.*]] = fdiv half [[A_LOAD]], [[B_LOAD]]
+ // AVX: store half [[AB_DIV]], {{.*}}
+
+ // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // X86-NEXT: [[AB_DIV:%.*]] = fdiv float [[A_EXT]], [[B_EXT]]
+ // X86-NEXT: [[AB_DIV_TRUNC:%.*]] = fptrunc float [[AB_DIV]] to half
+ // X86: store half [[AB_DIV_TRUNC]], {{.*}}
return a / b;
}
+
_Float16 _Complex div_half_cr(_Float16 _Complex a, _Float16 b) {
- // X86-LABEL: @div_half_cr(
- // X86: fdiv
- // X86: fdiv
- // X86-NOT: fdiv
- // X86: ret
+ // CHECK-LABEL: @div_half_cr(
+ // CHECK: [[B:%.*]] = alloca half
+ // CHECK: [[AR:%.*]] = load half, ptr {{.*}}
+ // CHECK: [[AI:%.*]] = load half, ptr {{.*}}
+
+ // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]]
+ // AVX: [[AB_DIVR:%.*]] = fdiv half [[AR]], [[A_LOAD]]
+ // AVX: [[AB_DIVI:%.*]] = fdiv half [[AI]], [[A_LOAD]]
+ // AVX: store half [[AB_DIVR]], ptr {{.*}}
+ // AVX-NEXT: store half [[AB_DIVI]], ptr {{.*}}
+
+ // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float
+ // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float
+ // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // X86-NEXT: [[AB_DIVR:%.*]] = fdiv float [[AR_EXT]], [[B_EXT]]
+ // X86-NEXT: [[AB_DIVI:%.*]] = fdiv float [[AI_EXT]], [[B_EXT]]
+ // X86-NEXT: [[AB_DIVR_TRUNC:%.*]] = fptrunc float [[AB_DIVR]] to half
+ // X86-NEXT: [[AB_DIVI_TRUNC:%.*]] = fptrunc float [[AB_DIVI]] to half
+ // X86: store half [[AB_DIVR_TRUNC]], ptr {{.*}}
+ // X86-NEXT: store half [[AB_DIVI_TRUNC]], ptr {{.*}}
return a / b;
}
_Float16 _Complex div_half_rc(_Float16 a, _Float16 _Complex b) {
- // X86-LABEL: @div_half_rc(
- // X86-NOT: fdiv
- // X86: call {{.*}} @__divhc3(
- // X86: ret
+ // CHECK-LABEL: @div_half_rc(
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK: store half %a, ptr [[A]]
+ // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]]
+ // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: call <2 x half> @__divhc3(half {{.*}} [[A_LOAD]],
+ // AVX-DAG: half {{.*}} [[BR_LOAD]],
+ // AVX-DAG: half {{.*}} [[BI_LOAD]])
+
+ // X86: [[A_EXT:%.*]] = fpext half {{.*}} to float
+ // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+ // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+ // X86-DAG: call <2 x float> @__divsc3(float {{.*}} [[A_EXT]],
+ // X86-DAG: float {{.*}} [[BR_EXT]],
+ // X86-DAG: float {{.*}} [[BI_EXT]])
return a / b;
}
+
_Float16 _Complex div_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
- // X86-LABEL: @div_half_cc(
- // X86-NOT: fdiv
- // X86: call {{.*}} @__divhc3(
- // X86: ret
+ // CHECK-LABEL: @div_half_cc(
+
+ // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}}
+ // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+
+ // AVX: call <2 x half> @__divhc3(half {{.*}} [[AR_LOAD]],
+ // AVX-DAG: half {{.*}} [[AI_LOAD]],
+ // AVX-DAG: half {{.*}} [[BR_LOAD]],
+ // AVX-DAG: half {{.*}} [[BI_LOAD]])
+
+ // X86: [[AR_EXT:%.*]] = fpext half [[AR_LOAD]] to float
+ // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI_LOAD]] to float
+ // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}}
+
+ // X86: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float
+ // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float
+ // X86: call <2 x float> @__divsc3(float {{.*}} [[AR_EXT]],
+ // X86-DAG: float {{.*}} [[AI_EXT]],
+ // X86-DAG: float {{.*}} [[BR_EXT]],
+ // X86-DAG: float {{.*}} [[BI_EXT]])
return a / b;
}
+
+_Float16 _Complex addcompound_half_rrr(_Float16 a, _Float16 c) {
+ // CHECK-LABEL: @addcompound_half_rrr
+
+ // AVX: [[A_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX-NEXT: [[C_LOAD:%.*]] = load half, ptr {{.*}}
+ // AVX-NEXT: [[AC_ADD:%.*]] = fadd half [[C_LOAD]], [[A_LOAD]]
+ // AVX-NEXT: store half [[AC_ADD]], ptr {{.*}}
+
+ // X86: [[A_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86-NEXT: [[A_EXT:%.*]] = fpext half %0 to float
+ // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr {{.*}}
+ // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // X86-NEXT: [[AC_ADD:%.*]] = fadd float [[B_EXT]], [[A_EXT]]
+ // X86-NEXT: [[AC_ADD_TRUNC:%.*]] = fptrunc float [[AC_ADD]] to half
+ // X86-NEXT: store half [[AC_ADD_TRUNC]], ptr {{.*}}
+ c += a;
+ return c;
+}
Index: clang/test/CodeGen/X86/Float16-arithmetic.c
===================================================================
--- clang/test/CodeGen/X86/Float16-arithmetic.c
+++ clang/test/CodeGen/X86/Float16-arithmetic.c
@@ -1,29 +1,121 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown \
// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
-// CHECK-NOT: fpext
-// CHECK-NOT: fptrunc
-
_Float16 add1(_Float16 a, _Float16 b) {
+ // CHECK-LABEL: define {{.*}} half @add1
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK-NEXT: [[B:%.*]] = alloca half
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+ // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // CHECK-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[B_EXT]]
+ // CHECK-NEXT: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half
+ // CHECK: ret half [[AB_ADD_TRUNC]]
return a + b;
}
_Float16 add2(_Float16 a, _Float16 b, _Float16 c) {
+ // CHECK-LABEL: define {{.*}} half @add2
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK-NEXT: [[B:%.*]] = alloca half
+ // CHECK: [[C:%.*]] = alloca half
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+ // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[B]]
+ // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // CHECK-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[B_EXT]]
+ // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr [[C]]
+ // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float
+ // CHECK-NEXT: [[ABC_ADD:%.*]] = fadd float [[AB_ADD]], [[C_EXT]]
+ // CHECK-NEXT: [[ABC_ADD_TRUNC:%.*]] = fptrunc float [[ABC_ADD]] to half
+ // CHECK: ret half [[ABC_ADD_TRUNC]]
return a + b + c;
}
_Float16 div(_Float16 a, _Float16 b) {
+ // CHECK-LABEL: define dso_local half @div
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK-NEXT: [[B:%.*]] = alloca half
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+ // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // CHECK-NEXT: [[AB_DIV:%.*]] = fdiv float [[A_EXT]], [[B_EXT]]
+ // CHECK-NEXT: [[AB_DIV_TRUNC:%.*]] = fptrunc float [[AB_DIV]] to half
+ // CHECK: ret half [[AB_DIV_TRUNC]]
return a / b;
}
_Float16 mul(_Float16 a, _Float16 b) {
+ // CHECK-LABEL: define dso_local half @mul
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK-NEXT: [[B:%.*]] = alloca half
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+ // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // CHECK-NEXT: [[AB_MUL:%.*]] = fmul float [[A_EXT]], [[B_EXT]]
+ // CHECK-NEXT: [[AB_MUL_TRUNC:%.*]] = fptrunc float [[AB_MUL]] to half
+ // CHECK: ret half [[AB_MUL_TRUNC]]
return a * b;
}
_Float16 add_and_mul1(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
+ // CHECK-LABEL: define dso_local half @add_and_mul1
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK-NEXT: [[B:%.*]] = alloca half
+ // CHECK-NEXT: [[C:%.*]] = alloca half
+ // CHECK: [[D:%.*]] = alloca half
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+ // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[B]]
+ // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // CHECK-NEXT: [[AB_MUL:%.*]] = fmul float [[A_EXT]], [[B_EXT]]
+ // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr [[C]]
+ // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float
+ // CHECK-NEXT: [[D_LOAD:%.*]] = load half, ptr [[D]]
+ // CHECK-NEXT: [[D_EXT:%.*]] = fpext half [[D_LOAD]] to float
+ // CHECK-NEXT: [[CD_MUL:%.*]] = fmul float [[C_EXT]], [[D_EXT]]
+ // CHECK-NEXT: [[ADD:%.*]] = fadd float [[AB_MUL]], [[CD_MUL]]
+ // CHECK-NEXT: [[ADD_TRUNC:%.*]] = fptrunc float [[ADD]] to half
+ // CHECK: ret half [[ADD_TRUNC]]
return a * b + c * d;
}
_Float16 add_and_mul2(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
+ // CHECK-LABEL: define dso_local half @add_and_mul2
+ // CHECK: [[A:%.*]] = alloca half
+ // CHECK-NEXT: [[B:%.*]] = alloca half
+ // CHECK-NEXT: [[C:%.*]] = alloca half
+ // CHECK: [[D:%.*]] = alloca half
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]]
+ // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]]
+ // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float
+ // CHECK-NEXT: [[MUL:%.*]] = fmul float 6.000000e+00, [[B_EXT]]
+ // CHECK-NEXT: [[SUB:%.*]] = fsub float [[A_EXT]], [[MUL]]
+ // CHECK-NEXT: [[SUB_TRUNC:%.*]] = fptrunc float [[SUB]] to half
+ // CHECK-NEXT: [[SUB_EXT:%.*]] = fpext half [[SUB_TRUNC]] to float
+ // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr [[C]]
+ // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float
+ // CHECK-NEXT: [[ADD:%.*]] = fadd float [[SUB_EXT]], [[C_EXT]]
+ // CHECK-NEXT: [[ADD_TUNC:%.*]] = fptrunc float [[ADD]] to half
+ // CHECK: ret half [[ADD_TRUNC]]
return (a - 6 * b) + c;
}
+
+_Float16 addcompound(_Float16 a, _Float16 c) {
+ // CHECK-LABEL: dso_local half @addcompound
+ // CHECK: [[A_LOAD:%.*]] = load half, ptr {{.*}}
+ // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float
+ // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr {{.*}}
+ // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float
+ // CHECK-NEXT: [[AC_ADD:%.*]] = fadd float [[C_EXT]], [[A_EXT]]
+ // CHECK-NEXT: [[AC_ADD_TRUNC:%.*]] = fptrunc float [[AC_ADD]] to half
+ // CHECK-NEXT: store half [[AC_ADD_TRUNC]], ptr {{.*}}
+ // CHECK-NEXT: [[RES:%.*]] = load half, ptr {{.*}}
+ // CHECK: ret half [[RES]]
+ c += a;
+ return c;
+}
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -4403,6 +4403,9 @@
/// EmitLoadOfComplex - Load a complex number from the specified l-value.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc);
+ ComplexPairTy EmitPromotedComplexExpr(const Expr *E, QualType DstTy);
+ llvm::Value *EmitPromotedScalarExpr(const Expr *E, QualType DstType);
+
Address emitAddrOfRealComponent(Address complex, QualType complexType);
Address emitAddrOfImagComponent(Address complex, QualType complexType);
Index: clang/lib/CodeGen/CGExprScalar.cpp
===================================================================
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -791,21 +791,47 @@
// Helper functions for fixed point binary operations.
Value *EmitFixedPointBinOp(const BinOpInfo &Ops);
- BinOpInfo EmitBinOps(const BinaryOperator *E);
- LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E,
- Value *(ScalarExprEmitter::*F)(const BinOpInfo &),
- Value *&Result);
+ BinOpInfo EmitBinOps(const BinaryOperator *E,
+ QualType PromotionTy = QualType());
+
+ Value *EmitPromoted(const Expr *E, QualType PromotionTy);
+
+ LValue EmitCompoundAssignLValue(
+ const CompoundAssignOperator *E, QualType PromotionType,
+ Value *(ScalarExprEmitter::*F)(const BinOpInfo &), Value *&Result);
Value *EmitCompoundAssign(const CompoundAssignOperator *E,
+ QualType PromotionTy,
Value *(ScalarExprEmitter::*F)(const BinOpInfo &));
+ QualType getPromotionType(const Expr *E) {
+ if (E->getType()->isFloat16Type()) {
+ if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision())
+ return CGF.getContext().FloatTy;
+ }
+ return QualType();
+ }
+
// Binary operators and binary compound assignment operators.
-#define HANDLEBINOP(OP) \
- Value *VisitBin ## OP(const BinaryOperator *E) { \
- return Emit ## OP(EmitBinOps(E)); \
- } \
- Value *VisitBin ## OP ## Assign(const CompoundAssignOperator *E) { \
- return EmitCompoundAssign(E, &ScalarExprEmitter::Emit ## OP); \
+#define HANDLEBINOP(OP) \
+ Value *VisitBin##OP(const BinaryOperator *E) { \
+ QualType promotionTy = getPromotionType(E); \
+ auto result = Emit##OP(EmitBinOps(E, promotionTy)); \
+ if (result) \
+ if (!promotionTy.isNull()) \
+ result = Builder.CreateFPTrunc(result, ConvertType(E->getType()), \
+ "unpromotion"); \
+ return result; \
+ } \
+ Value *VisitBin##OP##Assign(const CompoundAssignOperator *E) { \
+ QualType promotionTy = getPromotionType(E); \
+ auto result = \
+ EmitCompoundAssign(E, promotionTy, &ScalarExprEmitter::Emit##OP); \
+ if (result) \
+ if (!promotionTy.isNull()) \
+ result = Builder.CreateFPTrunc(result, ConvertType(E->getType()), \
+ "unpromotion"); \
+ return result; \
}
HANDLEBINOP(Mul)
HANDLEBINOP(Div)
@@ -3050,12 +3076,45 @@
// Binary Operators
//===----------------------------------------------------------------------===//
-BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) {
+Value *ScalarExprEmitter::EmitPromoted(const Expr *E, QualType PromotionType) {
+ if (auto BO = dyn_cast<BinaryOperator>(E)) {
+ switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP) \
+ case BO_##OP: \
+ return Emit##OP(EmitBinOps(BO, PromotionType));
+ HANDLE_BINOP(Add)
+ HANDLE_BINOP(Sub)
+ HANDLE_BINOP(Mul)
+ HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+ default:
+ break;
+ }
+ } else {
+ auto result = Visit(const_cast<Expr *>(E));
+ if (result)
+ return CGF.Builder.CreateFPExt(result, ConvertType(PromotionType), "ext");
+ }
+ // fallback path
+ auto result = Visit(const_cast<Expr *>(E));
+ if (result)
+ result = CGF.Builder.CreateFPExt(result, ConvertType(E->getType()));
+ return result;
+}
+
+BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E,
+ QualType PromotionType) {
TestAndClearIgnoreResultAssign();
BinOpInfo Result;
- Result.LHS = Visit(E->getLHS());
- Result.RHS = Visit(E->getRHS());
- Result.Ty = E->getType();
+ if (!PromotionType.isNull()) {
+ Result.LHS = CGF.EmitPromotedScalarExpr(E->getLHS(), PromotionType);
+ Result.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType);
+ Result.Ty = PromotionType;
+ } else {
+ Result.LHS = Visit(E->getLHS());
+ Result.RHS = Visit(E->getRHS());
+ Result.Ty = E->getType();
+ }
Result.Opcode = E->getOpcode();
Result.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
Result.E = E;
@@ -3063,9 +3122,8 @@
}
LValue ScalarExprEmitter::EmitCompoundAssignLValue(
- const CompoundAssignOperator *E,
- Value *(ScalarExprEmitter::*Func)(const BinOpInfo &),
- Value *&Result) {
+ const CompoundAssignOperator *E, QualType PromotionType,
+ Value *(ScalarExprEmitter::*Func)(const BinOpInfo &), Value *&Result) {
QualType LHSTy = E->getLHS()->getType();
BinOpInfo OpInfo;
@@ -3074,8 +3132,13 @@
// Emit the RHS first. __block variables need to have the rhs evaluated
// first, plus this should improve codegen a little.
- OpInfo.RHS = Visit(E->getRHS());
- OpInfo.Ty = E->getComputationResultType();
+ if (!PromotionType.isNull()) {
+ OpInfo.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType);
+ OpInfo.Ty = PromotionType;
+ } else {
+ OpInfo.RHS = Visit(E->getRHS());
+ OpInfo.Ty = E->getComputationResultType();
+ }
OpInfo.Opcode = E->getOpcode();
OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
OpInfo.E = E;
@@ -3094,7 +3157,8 @@
llvm::Instruction::BinaryOps Op;
switch (OpInfo.Opcode) {
// We don't have atomicrmw operands for *, %, /, <<, >>
- case BO_MulAssign: case BO_DivAssign:
+ case BO_MulAssign:
+ case BO_DivAssign:
case BO_RemAssign:
case BO_ShlAssign:
case BO_ShrAssign:
@@ -3154,16 +3218,24 @@
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, OpInfo.FPFeatures);
SourceLocation Loc = E->getExprLoc();
- OpInfo.LHS =
- EmitScalarConversion(OpInfo.LHS, LHSTy, E->getComputationLHSType(), Loc);
+ if (!PromotionType.isNull())
+ OpInfo.LHS =
+ EmitScalarConversion(OpInfo.LHS, LHSTy, PromotionType, E->getExprLoc());
+ else
+ OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
+ E->getComputationLHSType(), Loc);
// Expand the binary operator.
Result = (this->*Func)(OpInfo);
// Convert the result back to the LHS type,
// potentially with Implicit Conversion sanitizer check.
- Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy,
- Loc, ScalarConversionOpts(CGF.SanOpts));
+ if (!PromotionType.isNull())
+ Result = EmitScalarConversion(Result, PromotionType, LHSTy, Loc,
+ ScalarConversionOpts(CGF.SanOpts));
+ else
+ Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy,
+ Loc, ScalarConversionOpts(CGF.SanOpts));
if (atomicPHI) {
llvm::BasicBlock *curBlock = Builder.GetInsertBlock();
@@ -3193,11 +3265,12 @@
return LHSLV;
}
-Value *ScalarExprEmitter::EmitCompoundAssign(const CompoundAssignOperator *E,
- Value *(ScalarExprEmitter::*Func)(const BinOpInfo &)) {
+Value *ScalarExprEmitter::EmitCompoundAssign(
+ const CompoundAssignOperator *E, QualType PromotionTy,
+ Value *(ScalarExprEmitter::*Func)(const BinOpInfo &)) {
bool Ignore = TestAndClearIgnoreResultAssign();
Value *RHS = nullptr;
- LValue LHS = EmitCompoundAssignLValue(E, Func, RHS);
+ LValue LHS = EmitCompoundAssignLValue(E, PromotionTy, Func, RHS);
// If the result is clearly ignored, return now.
if (Ignore)
@@ -4896,6 +4969,12 @@
.EmitComplexToScalarConversion(Src, SrcTy, DstTy, Loc);
}
+Value *
+CodeGenFunction::EmitPromotedScalarExpr(const Expr *E,
+ QualType DstType) {
+ return ScalarExprEmitter(*this).EmitPromoted(E, DstType);
+}
+
llvm::Value *CodeGenFunction::
EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
@@ -4930,8 +5009,8 @@
switch (E->getOpcode()) {
#define COMPOUND_OP(Op) \
case BO_##Op##Assign: \
- return Scalar.EmitCompoundAssignLValue(E, &ScalarExprEmitter::Emit##Op, \
- Result)
+ return Scalar.EmitCompoundAssignLValue( \
+ E, Scalar.getPromotionType(E), &ScalarExprEmitter::Emit##Op, Result)
COMPOUND_OP(Mul);
COMPOUND_OP(Div);
COMPOUND_OP(Rem);
Index: clang/lib/CodeGen/CGExprComplex.cpp
===================================================================
--- clang/lib/CodeGen/CGExprComplex.cpp
+++ clang/lib/CodeGen/CGExprComplex.cpp
@@ -253,7 +253,9 @@
QualType Ty; // Computation Type.
};
- BinOpInfo EmitBinOps(const BinaryOperator *E);
+ BinOpInfo EmitBinOps(const BinaryOperator *E,
+ QualType PromotionTy = QualType());
+ ComplexPairTy EmitPromoted(const Expr *E, QualType PromotionTy);
LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E,
ComplexPairTy (ComplexExprEmitter::*Func)
(const BinOpInfo &),
@@ -270,18 +272,43 @@
ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName,
const BinOpInfo &Op);
- ComplexPairTy VisitBinAdd(const BinaryOperator *E) {
- return EmitBinAdd(EmitBinOps(E));
- }
- ComplexPairTy VisitBinSub(const BinaryOperator *E) {
- return EmitBinSub(EmitBinOps(E));
- }
- ComplexPairTy VisitBinMul(const BinaryOperator *E) {
- return EmitBinMul(EmitBinOps(E));
- }
- ComplexPairTy VisitBinDiv(const BinaryOperator *E) {
- return EmitBinDiv(EmitBinOps(E));
- }
+ QualType getPromotionType(const Expr *E) {
+ assert(E->getType()->isAnyComplexType() &&
+ "Expecting to promote a complex type!");
+ QualType ElementType =
+ E->getType()->castAs<ComplexType>()->getElementType();
+ if (ElementType->isFloat16Type())
+ if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision())
+ return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
+ return QualType();
+ }
+
+#define HANDLEBINOP(OP) \
+ ComplexPairTy VisitBin##OP(const BinaryOperator *E) { \
+ QualType promotionTy = getPromotionType(E); \
+ ComplexPairTy result = EmitBin##OP(EmitBinOps(E, promotionTy)); \
+ if (!promotionTy.isNull()) { \
+ if (result.first) \
+ result.first = Builder.CreateFPTrunc( \
+ result.first, \
+ CGF.ConvertType( \
+ E->getType()->castAs<ComplexType>()->getElementType()), \
+ "unpromotion"); \
+ if (result.second) \
+ result.second = Builder.CreateFPTrunc( \
+ result.second, \
+ CGF.ConvertType( \
+ E->getType()->castAs<ComplexType>()->getElementType()), \
+ "unpromotion"); \
+ } \
+ return result; \
+ }
+
+ HANDLEBINOP(Mul)
+ HANDLEBINOP(Div)
+ HANDLEBINOP(Add)
+ HANDLEBINOP(Sub)
+#undef HANDLEBINOP
ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
return Visit(E->getSemanticForm());
@@ -876,25 +903,95 @@
return ComplexPairTy(DSTr, DSTi);
}
+ComplexPairTy ComplexExprEmitter::EmitPromoted(const Expr *E,
+ QualType PromotionType) {
+ if (auto BO = dyn_cast<BinaryOperator>(E)) {
+ switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP) \
+ case BO_##OP: \
+ return EmitBin##OP(EmitBinOps(BO, PromotionType));
+ HANDLE_BINOP(Add)
+ HANDLE_BINOP(Sub)
+ HANDLE_BINOP(Mul)
+ HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+ default:
+ break;
+ }
+ } else {
+ ComplexPairTy Result = Visit(const_cast<Expr *>(E));
+ llvm::Value *Resultr = CGF.Builder.CreateFPExt(
+ Result.first,
+ CGF.ConvertType(PromotionType->castAs<ComplexType>()->getElementType()),
+ "ext");
+ llvm::Value *Resulti = CGF.Builder.CreateFPExt(
+ Result.second,
+ CGF.ConvertType(PromotionType->castAs<ComplexType>()->getElementType()),
+ "ext");
+ return ComplexPairTy(Resultr, Resulti);
+ }
+ // fallback path
+ ComplexPairTy Result = Visit(const_cast<Expr *>(E));
+ llvm::Value *Resultr = CGF.Builder.CreateFPExt(
+ Result.first,
+ CGF.ConvertType(E->getType()->castAs<ComplexType>()->getElementType()));
+ llvm::Value *Resulti = CGF.Builder.CreateFPExt(
+ Result.second,
+ CGF.ConvertType(E->getType()->castAs<ComplexType>()->getElementType()));
+ return ComplexPairTy(Resultr, Resulti);
+}
+
+ComplexPairTy CodeGenFunction::EmitPromotedComplexExpr(const Expr *E,
+ QualType DstTy) {
+ return ComplexExprEmitter(*this).EmitPromoted(E, DstTy);
+}
+
ComplexExprEmitter::BinOpInfo
-ComplexExprEmitter::EmitBinOps(const BinaryOperator *E) {
+ComplexExprEmitter::EmitBinOps(const BinaryOperator *E,
+ QualType PromotionType) {
TestAndClearIgnoreReal();
TestAndClearIgnoreImag();
BinOpInfo Ops;
- if (E->getLHS()->getType()->isRealFloatingType())
- Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr);
- else
- Ops.LHS = Visit(E->getLHS());
- if (E->getRHS()->getType()->isRealFloatingType())
- Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
- else
- Ops.RHS = Visit(E->getRHS());
- Ops.Ty = E->getType();
+ if (E->getLHS()->getType()->isRealFloatingType()) {
+ if (!PromotionType.isNull())
+ Ops.LHS = ComplexPairTy(
+ CGF.EmitPromotedScalarExpr(
+ E->getLHS(),
+ PromotionType->castAs<ComplexType>()->getElementType()),
+ nullptr);
+ else
+ Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr);
+ } else {
+ if (!PromotionType.isNull())
+ Ops.LHS = ComplexPairTy(
+ CGF.EmitPromotedComplexExpr(E->getLHS(), PromotionType));
+ else
+ Ops.LHS = Visit(E->getLHS());
+ }
+ if (E->getRHS()->getType()->isRealFloatingType()) {
+ if (!PromotionType.isNull())
+ Ops.RHS = ComplexPairTy(
+ CGF.EmitPromotedScalarExpr(
+ E->getRHS(),
+ PromotionType->castAs<ComplexType>()->getElementType()),
+ nullptr);
+ else
+ Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
+ } else {
+ if (!PromotionType.isNull())
+ Ops.RHS = ComplexPairTy(
+ CGF.EmitPromotedComplexExpr(E->getRHS(), PromotionType));
+ else
+ Ops.RHS = Visit(E->getRHS());
+ }
+ if (!PromotionType.isNull())
+ Ops.Ty = PromotionType;
+ else
+ Ops.Ty = E->getType();
return Ops;
}
-
LValue ComplexExprEmitter::
EmitCompoundAssignLValue(const CompoundAssignOperator *E,
ComplexPairTy (ComplexExprEmitter::*Func)(const BinOpInfo&),
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -286,6 +286,10 @@
return false;
}
+ bool shouldEmitFloat16WithExcessPrecision() const {
+ return HasFloat16 && !hasLegalHalfType();
+ }
+
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -239,6 +239,7 @@
HasAVX512ER = true;
} else if (Feature == "+avx512fp16") {
HasAVX512FP16 = true;
+ HasLegalHalfType = true;
} else if (Feature == "+avx512pf") {
HasAVX512PF = true;
} else if (Feature == "+avx512dq") {
@@ -371,6 +372,8 @@
.Default(NoXOP);
XOPLevel = std::max(XOPLevel, XLevel);
}
+ // Turn on _float16 for x86 (feature sse2)
+ HasFloat16 = SSELevel >= SSE2;
// LLVM doesn't have a separate switch for fpmath, so only accept it if it
// matches the selected sse level.
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -909,6 +909,8 @@
return true;
}
+ virtual bool shouldEmitFloat16WithExcessPrecision() const { return false; }
+
/// Specify if mangling based on address space map should be used or
/// not for language specific address spaces
bool useAddressSpaceMapMangling() const {
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -527,6 +527,8 @@
handled incorrectly by some software (e.g. new failures with incorrect
assertions).
+- Support for ``_Float16`` type has been added.
+
Arm and AArch64 Support in Clang
--------------------------------
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -749,7 +749,11 @@
includes all 64-bit and all recent 32-bit processors. When the target supports
AVX512-FP16, ``_Float16`` arithmetic is performed using that native support.
Otherwise, ``_Float16`` arithmetic is performed by promoting to ``float``,
-performing the operation, and then truncating to ``_Float16``.
+performing the operation, and then truncating to ``_Float16``. When doing this
+emulation, Clang defaults to following the C standard's rules for excess
+precision arithmetic, which avoids intermediate truncations within statements
+and may generate different results from a strict operation-by-operation
+emulation.
``_Float16`` will be supported on more targets as they define ABIs for it.
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits