zahiraam updated this revision to Diff 441167. zahiraam marked 2 inline comments as done.
CHANGES SINCE LAST ACTION https://reviews.llvm.org/D113107/new/ https://reviews.llvm.org/D113107 Files: clang/docs/LanguageExtensions.rst clang/docs/ReleaseNotes.rst clang/include/clang/Basic/TargetInfo.h clang/lib/Basic/Targets/X86.cpp clang/lib/Basic/Targets/X86.h clang/lib/CodeGen/CGExprComplex.cpp clang/lib/CodeGen/CGExprScalar.cpp clang/lib/CodeGen/CodeGenFunction.h clang/test/CodeGen/X86/Float16-arithmetic.c clang/test/CodeGen/X86/Float16-complex.c clang/test/Sema/Float16.c clang/test/SemaCXX/Float16.cpp
Index: clang/test/SemaCXX/Float16.cpp =================================================================== --- clang/test/SemaCXX/Float16.cpp +++ clang/test/SemaCXX/Float16.cpp @@ -1,20 +1,10 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s -// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -#ifdef HAVE // expected-no-diagnostics -#endif // HAVE -#ifndef HAVE -// expected-error@+2{{_Float16 is not supported on this target}} -#endif // !HAVE _Float16 f; -#ifndef HAVE -// expected-error@+2{{invalid suffix 'F16' on floating constant}} -#endif // !HAVE const auto g = 1.1F16; Index: clang/test/Sema/Float16.c =================================================================== --- clang/test/Sema/Float16.c +++ clang/test/Sema/Float16.c @@ -1,19 +1,23 @@ +<<<<<<< HEAD // RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s // RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE // RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE // RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE // RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE // RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE +======= +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -#ifndef HAVE -// expected-error@+2{{_Float16 is not supported on this target}} -#endif // HAVE -_Float16 f; - -#ifdef HAVE _Complex _Float16 a; void builtin_complex(void) { _Float16 a = 0; (void)__builtin_complex(a, a); // expected-error {{'_Complex _Float16' is invalid}} } -#endif Index: clang/test/CodeGen/X86/Float16-complex.c =================================================================== --- clang/test/CodeGen/X86/Float16-complex.c +++ clang/test/CodeGen/X86/Float16-complex.c @@ -1,134 +1,407 @@ -// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86 -// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86 +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefixes=CHECK,AVX +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefixes=CHECK,X86 _Float16 _Complex add_half_rr(_Float16 a, _Float16 b) { - // X86-LABEL: @add_half_rr( - // X86: fadd - // X86-NOT: fadd - // X86: ret + // CHECK-LABEL: @add_half_rr( + // CHECK: [[A:%.*]] = alloca half + // CHECK-NEXT: [[B:%.*]] = alloca half + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]] + + // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // AVX-NEXT: [[AB_ADD:%.*]] = fadd half [[A_LOAD]], [[B_LOAD]] + // AVX: store half [[AB_ADD]], {{.*}} + + // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // X86-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[B_EXT]] + // X86-NEXT: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half + // X86: store half [[AB_ADD_TRUNC]], {{.*}} return a + b; } + _Float16 _Complex add_half_cr(_Float16 _Complex a, _Float16 b) { - // X86-LABEL: @add_half_cr( - // X86: fadd - // X86-NOT: fadd - // X86: ret + // CHECK-LABEL: @add_half_cr( + // CHECK: [[B:%.*]] = alloca half + // CHECK: [[AR:%.*]] = load half, ptr {{.*}} + // CHECK: [[AI:%.*]] = load half, ptr {{.*}} + + // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]] + // AVX: [[AB_ADD:%.*]] = fadd half [[AR]], [[A_LOAD]] + // AVX: store half [[AB_ADD]], ptr {{.*}} + // AVX: store half [[AI]], ptr {{.*}} + + // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float + // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float + // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // X86-NEXT: [[AB_ADD:%.*]] = fadd float [[AR_EXT]], [[B_EXT]] + // X86: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half + // X86-NEXT: [[AI_TRUNC:%.*]] = fptrunc float [[AI_EXT]] to half + // X86: store half [[AB_ADD_TRUNC]], ptr {{.*}} + // X86-NEXT: store half [[AI_TRUNC]], ptr {{.*}} return a + b; } + _Float16 _Complex add_half_rc(_Float16 a, _Float16 _Complex b) { - // X86-LABEL: @add_half_rc( - // X86: fadd - // X86-NOT: fadd - // X86: ret + // CHECK-LABEL: @add_half_rc( + + // CHECK: [[A:%.*]] = alloca half + // CHECK: store half %a, ptr [[A]] + // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]] + + // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[AB_ADD:%.*]] = fadd half [[A_LOAD]], [[BR_LOAD]] + // AVX: store half [[AB_ADD]], ptr {{.*}} + // AVX-NEXT: store half [[BI_LOAD]], ptr {{.*}} + + // X86: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float + // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float + // X86-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[BR_EXT]] + // X86-NEXT: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half + // X86-NEXT: [[BI_TRUNC:%.*]] = fptrunc float [[BI_EXT]] to half + // X86: store half [[AB_ADD_TRUNC]], ptr {{.*}} + // X86-NEXT: store half [[BI_TRUNC]], ptr {{.*}} return a + b; } + _Float16 _Complex add_half_cc(_Float16 _Complex a, _Float16 _Complex b) { - // X86-LABEL: @add_half_cc( - // X86: fadd - // X86: fadd - // X86-NOT: fadd - // X86: ret + // CHECK-LABEL: @add_half_cc( + + // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}} + // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}} + + // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // AVX-NEXT: [[AB_ADDR:%.*]] = fadd half [[AR_LOAD]], [[BR_LOAD]] + // AVX-NEXT: [[AB_ADDI:%.*]] = fadd half [[AI_LOAD]], [[BI_LOAD]] + // AVX: store half [[AB_ADDR]], ptr {{.*}} + // AVX-NEXT: store half [[AB_ADDI]], ptr {{.*}} + + // X86: [[AR_LOAD_EXT:%.*]] = fpext half [[AR_LOAD]] to float + // X86: [[AI_LOAD_EXT:%.*]] = fpext half [[AI_LOAD]] to float + // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float + // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float + // X86-NEXT: [[AB_ADDR:%.*]] = fadd float [[AR_LOAD_EXT]], [[BR_EXT]] + // X86-NEXT: [[AB_ADDI:%.*]] = fadd float [[AI_LOAD_EXT]], [[BI_EXT]] + // X86-NEXT: [[AB_ADDR_TRUNC:%.*]] = fptrunc float [[AB_ADDR]] to half + // X86-NEXT: [[AB_ADDI_TRUNC:%.*]] = fptrunc float [[AB_ADDI]] to half + // X86: store half [[AB_ADDR_TRUNC]], ptr {{.*}} + // X86-NEXT: store half [[AB_ADDI_TRUNC]], ptr {{.*}} return a + b; } _Float16 _Complex sub_half_rr(_Float16 a, _Float16 b) { - // X86-LABEL: @sub_half_rr( - // X86: fsub - // X86-NOT: fsub - // X86: ret + // CHECK-LABEL: @sub_half_rr( + // CHECK: [[A:%.*]] = alloca half + // CHECK-NEXT: [[B:%.*]] = alloca half + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]] + + // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // AVX-NEXT: [[AB_SUB:%.*]] = fsub half [[A_LOAD]], [[B_LOAD]] + // AVX: store half [[AB_SUB]], {{.*}} + // AVX-NEXT: store half 0xH0000, {{.*}} + + // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // X86-NEXT: [[AB_SUB:%.*]] = fsub float [[A_EXT]], [[B_EXT]] + // X86-NEXT: [[AB_SUB_TRUNC:%.*]] = fptrunc float [[AB_SUB]] to half + // X86: store half [[AB_SUB_TRUNC]], {{.*}} + // X86-NEXT: store half 0xH0000, {{.*}} return a - b; } + _Float16 _Complex sub_half_cr(_Float16 _Complex a, _Float16 b) { - // X86-LABEL: @sub_half_cr( - // X86: fsub - // X86-NOT: fsub - // X86: ret + // CHECK-LABEL: @sub_half_cr( + // CHECK: [[B:%.*]] = alloca half + // CHECK: [[AR:%.*]] = load half, ptr {{.*}} + // CHECK: [[AI:%.*]] = load half, ptr {{.*}} + + // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]] + // AVX: [[AB_SUB:%.*]] = fsub half [[AR]], [[A_LOAD]] + // AVX: store half [[AB_SUB]], ptr {{.*}} + // AVX: store half [[AI]], ptr {{.*}} + + // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float + // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float + // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // X86-NEXT: [[AB_SUB:%.*]] = fsub float [[AR_EXT]], [[B_EXT]] + // X86-NEXT: fptrunc float [[AB_SUB]] to half + // X86-NEXT: fptrunc float [[AI_EXT]] to half return a - b; } + _Float16 _Complex sub_half_rc(_Float16 a, _Float16 _Complex b) { - // X86-LABEL: @sub_half_rc( - // X86: fsub - // X86: fneg - // X86-NOT: fsub - // X86: ret + // CHECK-LABEL: @sub_half_rc( + + // CHECK: [[A:%.*]] = alloca half + // CHECK: store half {{.*}}, ptr [[A]] + // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]] + + // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[AB_SUBR:%.*]] = fsub half [[A_LOAD]], [[BR_LOAD]] + // AVX: [[AB_SUBI:%.*]] = fneg half {{.*}} + // AVX: store half [[AB_SUBR]], ptr {{.*}} + // AVX-NEXT: store half [[AB_SUBI]], ptr {{.*}} + + // X86: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float + // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float + // X86-NEXT: [[AB_SUBR:%.*]] = fsub float [[A_EXT]], [[BR_EXT]] + // X86-NEXT: [[AB_SUBI:%.*]] = fneg float [[BI_EXT]] + // X86-NEXT: [[AB_SUBR_TRUNC:%.*]] = fptrunc float [[AB_SUBR]] to half + // X86-NEXT: [[AB_SUBI_TRUNC:%.*]] = fptrunc float [[AB_SUBI]] to half + // X86: store half [[AB_SUBR_TRUNC]], ptr {{.*}} + // X86-NEXT: store half [[AB_SUBI_TRUNC]], ptr {{.*}} return a - b; } + _Float16 _Complex sub_half_cc(_Float16 _Complex a, _Float16 _Complex b) { - // X86-LABEL: @sub_half_cc( - // X86: fsub - // X86: fsub - // X86-NOT: fsub - // X86: ret + // CHECK-LABEL: @sub_half_cc( + + // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}} + // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}} + + // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // AVX-NEXT: [[AB_SUBR:%.*]] = fsub half [[AR_LOAD]], [[BR_LOAD]] + // AVX-NEXT: [[AB_SUBI:%.*]] = fsub half [[AI_LOAD]], [[BI_LOAD]] + // AVX: store half [[AB_SUBR]], ptr {{.*}} + // AVX-NEXT: store half [[AB_SUBI]], ptr {{.*}} + + // X86: [[AR_LOAD_EXT:%.*]] = fpext half [[AR_LOAD]] to float + // X86: [[AI_LOAD_EXT:%.*]] = fpext half [[AI_LOAD]] to float + // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float + // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float + // X86-NEXT: [[AB_SUBR:%.*]] = fsub float [[AR_LOAD_EXT]], [[BR_EXT]] + // X86-NEXT: [[AB_SUBI:%.*]] = fsub float [[AI_LOAD_EXT]], [[BI_EXT]] + // X86-NEXT: [[AB_SUBR_TRUNC:%.*]] = fptrunc float [[AB_SUBR]] to half + // X86-NEXT: [[AB_SUBI_TRUNC:%.*]] = fptrunc float [[AB_SUBI]] to half + // X86: store half [[AB_SUBR_TRUNC]], ptr {{.*}} + // X86-NEXT: store half [[AB_SUBI_TRUNC]], ptr {{.*}} return a - b; } _Float16 _Complex mul_half_rr(_Float16 a, _Float16 b) { - // X86-LABEL: @mul_half_rr( - // X86: fmul - // X86-NOT: fmul - // X86: ret + // CHECK-LABEL: @mul_half_rr( + // CHECK: [[A:%.*]] = alloca half + // CHECK-NEXT: [[B:%.*]] = alloca half + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]] + + // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // AVX-NEXT: [[AB_MUL:%.*]] = fmul half [[A_LOAD]], [[B_LOAD]] + // AVX: store half [[AB_MUL]], {{.*}} + // AVX-NEXT: store half 0xH0000, {{.*}} + + // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // X86-NEXT: [[AB_MUL:%.*]] = fmul float [[A_EXT]], [[B_EXT]] + // X86-NEXT: [[AB_MUL_TRUNC:%.*]] = fptrunc float [[AB_MUL]] to half + // X86: store half [[AB_MUL_TRUNC]], {{.*}} + // X86-NEXT: store half 0xH0000, {{.*}} return a * b; } + _Float16 _Complex mul_half_cr(_Float16 _Complex a, _Float16 b) { // X86-LABEL: @mul_half_cr( - // X86: fmul - // X86: fmul - // X86-NOT: fmul - // X86: ret + // CHECK: [[B:%.*]] = alloca half + // CHECK: [[AR:%.*]] = load half, ptr {{.*}} + // CHECK: [[AI:%.*]] = load half, ptr {{.*}} + + // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]] + // AVX: [[AB_MULR:%.*]] = fmul half [[AR]], [[A_LOAD]] + // AVX: [[AB_MULI:%.*]] = fmul half [[AI]], [[A_LOAD]] + // AVX: store half [[AB_MULR]], ptr {{.*}} + // AVX: store half [[AB_MULI]], ptr {{.*}} + + // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float + // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float + // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // X86-NEXT: [[AB_MULR:%.*]] = fmul float [[AR_EXT]], [[B_EXT]] + // X86-NEXT: [[AB_MULI:%.*]] = fmul float [[AI_EXT]], [[B_EXT]] + // X86-NEXT: [[AB_MULR_TRUNC:%.*]] = fptrunc float [[AB_MULR]] to half + // X86-NEXT: [[AB_MULI_TRUNC:%.*]] = fptrunc float [[AB_MULI]] to half + // X86: store half [[AB_MULR_TRUNC]], ptr {{.*}} + // X86-NEXT: store half [[AB_MULI_TRUNC]], ptr {{.*}} return a * b; } + _Float16 _Complex mul_half_rc(_Float16 a, _Float16 _Complex b) { - // X86-LABEL: @mul_half_rc( - // X86: fmul - // X86: fmul - // X86-NOT: fmul - // X86: ret + // CHECK-LABEL: @mul_half_rc( + // CHECK: [[A:%.*]] = alloca half + // CHECK: store half %a, ptr [[A]] + // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]] + + // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[AB_MULR:%.*]] = fmul half [[A_LOAD]], [[BR_LOAD]] + // AVX: [[AB_MULI:%.*]] = fmul half [[A_LOAD]], [[BI_LOAD]] + // AVX: store half [[AB_MULR]], ptr {{.*}} + // AVX-NEXT: store half [[AB_MULI]], ptr {{.*}} + + // X86: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float + // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float + // X86-NEXT: [[AB_MULR:%.*]] = fmul float [[A_EXT]], [[BR_EXT]] + // X86-NEXT: [[AB_MULI:%.*]] = fmul float [[A_EXT]], [[BI_EXT]] + // X86-NEXT: [[AB_MULR_TRUNC:%.*]] = fptrunc float [[AB_MULR]] to half + // X86-NEXT: [[AB_MULI_TRUNC:%.*]] = fptrunc float [[AB_MULI]] to half + // X86: store half [[AB_MULR_TRUNC]], ptr {{.*}} + // X86-NEXT: store half [[AB_MULI_TRUNC]], ptr {{.*}} return a * b; } + _Float16 _Complex mul_half_cc(_Float16 _Complex a, _Float16 _Complex b) { - // X86-LABEL: @mul_half_cc( - // X86: %[[AC:[^ ]+]] = fmul - // X86: %[[BD:[^ ]+]] = fmul - // X86: %[[AD:[^ ]+]] = fmul - // X86: %[[BC:[^ ]+]] = fmul - // X86: %[[RR:[^ ]+]] = fsub half %[[AC]], %[[BD]] - // X86: %[[RI:[^ ]+]] = fadd half - // X86-DAG: %[[AD]] - // X86-DAG: , - // X86-DAG: %[[BC]] - // X86: fcmp uno half %[[RR]] - // X86: fcmp uno half %[[RI]] - // X86: call {{.*}} @__mulhc3( - // X86: ret + // CHECK: @mul_half_cc( + // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}} + // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}} + + // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // AVX-NEXT: [[AC:%.*]] = fmul half [[AR_LOAD]], [[BR_LOAD]] + // AVX-NEXT: [[BD:%.*]] = fmul half [[AI_LOAD]], [[BI_LOAD]] + // AVX-NEXT: [[AD:%.*]] = fmul half [[AR_LOAD]], [[BI_LOAD]] + // AVX-NEXT: [[BC:%.*]] = fmul half [[AI_LOAD]], [[BR_LOAD]] + // AVX: call <2 x half> @__mulhc3( + + // X86: [[AR_EXT:%.*]] = fpext half [[AR_LOAD]] to float + // X86: [[AI_EXT:%.*]] = fpext half [[AI_LOAD]] to float + // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float + // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float + // X86-NEXT: [[AC:%.*]] = fmul float [[AR_EXT]], [[BR_EXT]] + // X86-NEXT: [[BD:%.*]] = fmul float [[AI_EXT]], [[BI_EXT]] + // X86-NEXT: [[AD:%.*]] = fmul float [[AR_EXT]], [[BI_EXT]] + // X86-NEXT: [[BC:%.*]] = fmul float [[AI_EXT]], [[BR_EXT]] + // X86: call <2 x float> @__mulsc3( return a * b; } - _Float16 _Complex div_half_rr(_Float16 a, _Float16 b) { - // X86-LABEL: @div_half_rr( - // X86: fdiv - // X86-NOT: fdiv - // X86: ret + // CHECK: @div_half_rr( + + // CHECK: [[A:%.*]] = alloca half + // CHECK-NEXT: [[B:%.*]] = alloca half + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]] + + // AVX-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // AVX-NEXT: [[AB_DIV:%.*]] = fdiv half [[A_LOAD]], [[B_LOAD]] + // AVX: store half [[AB_DIV]], {{.*}} + + // X86-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // X86-NEXT: [[AB_DIV:%.*]] = fdiv float [[A_EXT]], [[B_EXT]] + // X86-NEXT: [[AB_DIV_TRUNC:%.*]] = fptrunc float [[AB_DIV]] to half + // X86: store half [[AB_DIV_TRUNC]], {{.*}} return a / b; } + _Float16 _Complex div_half_cr(_Float16 _Complex a, _Float16 b) { - // X86-LABEL: @div_half_cr( - // X86: fdiv - // X86: fdiv - // X86-NOT: fdiv - // X86: ret + // CHECK-LABEL: @div_half_cr( + // CHECK: [[B:%.*]] = alloca half + // CHECK: [[AR:%.*]] = load half, ptr {{.*}} + // CHECK: [[AI:%.*]] = load half, ptr {{.*}} + + // AVX: [[A_LOAD:%.*]] = load half, ptr [[B]] + // AVX: [[AB_DIVR:%.*]] = fdiv half [[AR]], [[A_LOAD]] + // AVX: [[AB_DIVI:%.*]] = fdiv half [[AI]], [[A_LOAD]] + // AVX: store half [[AB_DIVR]], ptr {{.*}} + // AVX-NEXT: store half [[AB_DIVI]], ptr {{.*}} + + // X86-NEXT: [[AR_EXT:%.*]] = fpext half [[AR]] to float + // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI]] to float + // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // X86-NEXT: [[AB_DIVR:%.*]] = fdiv float [[AR_EXT]], [[B_EXT]] + // X86-NEXT: [[AB_DIVI:%.*]] = fdiv float [[AI_EXT]], [[B_EXT]] + // X86-NEXT: [[AB_DIVR_TRUNC:%.*]] = fptrunc float [[AB_DIVR]] to half + // X86-NEXT: [[AB_DIVI_TRUNC:%.*]] = fptrunc float [[AB_DIVI]] to half + // X86: store half [[AB_DIVR_TRUNC]], ptr {{.*}} + // X86-NEXT: store half [[AB_DIVI_TRUNC]], ptr {{.*}} return a / b; } _Float16 _Complex div_half_rc(_Float16 a, _Float16 _Complex b) { - // X86-LABEL: @div_half_rc( - // X86-NOT: fdiv - // X86: call {{.*}} @__divhc3( - // X86: ret + // CHECK-LABEL: @div_half_rc( + // CHECK: [[A:%.*]] = alloca half + // CHECK: store half %a, ptr [[A]] + // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[A]] + // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: call <2 x half> @__divhc3(half {{.*}} [[A_LOAD]], + // AVX-DAG: half {{.*}} [[BR_LOAD]], + // AVX-DAG: half {{.*}} [[BI_LOAD]]) + + // X86: [[A_EXT:%.*]] = fpext half {{.*}} to float + // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + // X86-NEXT: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float + // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float + // X86-DAG: call <2 x float> @__divsc3(float {{.*}} [[A_EXT]], + // X86-DAG: float {{.*}} [[BR_EXT]], + // X86-DAG: float {{.*}} [[BI_EXT]]) return a / b; } + _Float16 _Complex div_half_cc(_Float16 _Complex a, _Float16 _Complex b) { - // X86-LABEL: @div_half_cc( - // X86-NOT: fdiv - // X86: call {{.*}} @__divhc3( - // X86: ret + // CHECK-LABEL: @div_half_cc( + + // CHECK: [[AR_LOAD:%.*]] = load half, ptr {{.*}} + // CHECK: [[AI_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // AVX: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + + // AVX: call <2 x half> @__divhc3(half {{.*}} [[AR_LOAD]], + // AVX-DAG: half {{.*}} [[AI_LOAD]], + // AVX-DAG: half {{.*}} [[BR_LOAD]], + // AVX-DAG: half {{.*}} [[BI_LOAD]]) + + // X86: [[AR_EXT:%.*]] = fpext half [[AR_LOAD]] to float + // X86-NEXT: [[AI_EXT:%.*]] = fpext half [[AI_LOAD]] to float + // X86: [[BR_LOAD:%.*]] = load half, ptr {{.*}} + // X86: [[BI_LOAD:%.*]] = load half, ptr {{.*}} + + // X86: [[BR_EXT:%.*]] = fpext half [[BR_LOAD]] to float + // X86-NEXT: [[BI_EXT:%.*]] = fpext half [[BI_LOAD]] to float + // X86: call <2 x float> @__divsc3(float {{.*}} [[AR_EXT]], + // X86-DAG: float {{.*}} [[AI_EXT]], + // X86-DAG: float {{.*}} [[BR_EXT]], + // X86-DAG: float {{.*}} [[BI_EXT]]) return a / b; } + +_Float16 _Complex addcompound_half_rrr(_Float16 a, _Float16 c) { + // CHECK-LABEL: @addcompound_half_rrr + + // AVX: [[A_LOAD:%.*]] = load half, ptr {{.*}} + // AVX-NEXT: [[C_LOAD:%.*]] = load half, ptr {{.*}} + // AVX-NEXT: [[AC_ADD:%.*]] = fadd half [[C_LOAD]], [[A_LOAD]] + // AVX-NEXT: store half [[AC_ADD]], ptr {{.*}} + + // X86: [[A_LOAD:%.*]] = load half, ptr {{.*}} + // X86-NEXT: [[A_EXT:%.*]] = fpext half %0 to float + // X86-NEXT: [[B_LOAD:%.*]] = load half, ptr {{.*}} + // X86-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // X86-NEXT: [[AC_ADD:%.*]] = fadd float [[B_EXT]], [[A_EXT]] + // X86-NEXT: [[AC_ADD_TRUNC:%.*]] = fptrunc float [[AC_ADD]] to half + // X86-NEXT: store half [[AC_ADD_TRUNC]], ptr {{.*}} + c += a; + return c; +} Index: clang/test/CodeGen/X86/Float16-arithmetic.c =================================================================== --- clang/test/CodeGen/X86/Float16-arithmetic.c +++ clang/test/CodeGen/X86/Float16-arithmetic.c @@ -1,29 +1,121 @@ // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ // RUN: -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK -// CHECK-NOT: fpext -// CHECK-NOT: fptrunc - _Float16 add1(_Float16 a, _Float16 b) { + // CHECK-LABEL: define {{.*}} half @add1 + // CHECK: [[A:%.*]] = alloca half + // CHECK-NEXT: [[B:%.*]] = alloca half + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]] + // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // CHECK-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[B_EXT]] + // CHECK-NEXT: [[AB_ADD_TRUNC:%.*]] = fptrunc float [[AB_ADD]] to half + // CHECK: ret half [[AB_ADD_TRUNC]] return a + b; } _Float16 add2(_Float16 a, _Float16 b, _Float16 c) { + // CHECK-LABEL: define {{.*}} half @add2 + // CHECK: [[A:%.*]] = alloca half + // CHECK-NEXT: [[B:%.*]] = alloca half + // CHECK: [[C:%.*]] = alloca half + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]] + // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[B]] + // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // CHECK-NEXT: [[AB_ADD:%.*]] = fadd float [[A_EXT]], [[B_EXT]] + // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr [[C]] + // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float + // CHECK-NEXT: [[ABC_ADD:%.*]] = fadd float [[AB_ADD]], [[C_EXT]] + // CHECK-NEXT: [[ABC_ADD_TRUNC:%.*]] = fptrunc float [[ABC_ADD]] to half + // CHECK: ret half [[ABC_ADD_TRUNC]] return a + b + c; } _Float16 div(_Float16 a, _Float16 b) { + // CHECK-LABEL: define dso_local half @div + // CHECK: [[A:%.*]] = alloca half + // CHECK-NEXT: [[B:%.*]] = alloca half + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]] + // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // CHECK-NEXT: [[AB_DIV:%.*]] = fdiv float [[A_EXT]], [[B_EXT]] + // CHECK-NEXT: [[AB_DIV_TRUNC:%.*]] = fptrunc float [[AB_DIV]] to half + // CHECK: ret half [[AB_DIV_TRUNC]] return a / b; } _Float16 mul(_Float16 a, _Float16 b) { + // CHECK-LABEL: define dso_local half @mul + // CHECK: [[A:%.*]] = alloca half + // CHECK-NEXT: [[B:%.*]] = alloca half + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]] + // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // CHECK-NEXT: [[AB_MUL:%.*]] = fmul float [[A_EXT]], [[B_EXT]] + // CHECK-NEXT: [[AB_MUL_TRUNC:%.*]] = fptrunc float [[AB_MUL]] to half + // CHECK: ret half [[AB_MUL_TRUNC]] return a * b; } _Float16 add_and_mul1(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { + // CHECK-LABEL: define dso_local half @add_and_mul1 + // CHECK: [[A:%.*]] = alloca half + // CHECK-NEXT: [[B:%.*]] = alloca half + // CHECK-NEXT: [[C:%.*]] = alloca half + // CHECK: [[D:%.*]] = alloca half + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]] + // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // CHECK-NEXT: [[A_LOAD:%.*]] = load half, ptr [[B]] + // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // CHECK-NEXT: [[AB_MUL:%.*]] = fmul float [[A_EXT]], [[B_EXT]] + // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr [[C]] + // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float + // CHECK-NEXT: [[D_LOAD:%.*]] = load half, ptr [[D]] + // CHECK-NEXT: [[D_EXT:%.*]] = fpext half [[D_LOAD]] to float + // CHECK-NEXT: [[CD_MUL:%.*]] = fmul float [[C_EXT]], [[D_EXT]] + // CHECK-NEXT: [[ADD:%.*]] = fadd float [[AB_MUL]], [[CD_MUL]] + // CHECK-NEXT: [[ADD_TRUNC:%.*]] = fptrunc float [[ADD]] to half + // CHECK: ret half [[ADD_TRUNC]] return a * b + c * d; } _Float16 add_and_mul2(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { + // CHECK-LABEL: define dso_local half @add_and_mul2 + // CHECK: [[A:%.*]] = alloca half + // CHECK-NEXT: [[B:%.*]] = alloca half + // CHECK-NEXT: [[C:%.*]] = alloca half + // CHECK: [[D:%.*]] = alloca half + // CHECK: [[A_LOAD:%.*]] = load half, ptr [[A]] + // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // CHECK-NEXT: [[B_LOAD:%.*]] = load half, ptr [[B]] + // CHECK-NEXT: [[B_EXT:%.*]] = fpext half [[B_LOAD]] to float + // CHECK-NEXT: [[MUL:%.*]] = fmul float 6.000000e+00, [[B_EXT]] + // CHECK-NEXT: [[SUB:%.*]] = fsub float [[A_EXT]], [[MUL]] + // CHECK-NEXT: [[SUB_TRUNC:%.*]] = fptrunc float [[SUB]] to half + // CHECK-NEXT: [[SUB_EXT:%.*]] = fpext half [[SUB_TRUNC]] to float + // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr [[C]] + // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float + // CHECK-NEXT: [[ADD:%.*]] = fadd float [[SUB_EXT]], [[C_EXT]] + // CHECK-NEXT: [[ADD_TUNC:%.*]] = fptrunc float [[ADD]] to half + // CHECK: ret half [[ADD_TRUNC]] return (a - 6 * b) + c; } + +_Float16 addcompound(_Float16 a, _Float16 c) { + // CHECK-LABEL: dso_local half @addcompound + // CHECK: [[A_LOAD:%.*]] = load half, ptr {{.*}} + // CHECK-NEXT: [[A_EXT:%.*]] = fpext half [[A_LOAD]] to float + // CHECK-NEXT: [[C_LOAD:%.*]] = load half, ptr {{.*}} + // CHECK-NEXT: [[C_EXT:%.*]] = fpext half [[C_LOAD]] to float + // CHECK-NEXT: [[AC_ADD:%.*]] = fadd float [[C_EXT]], [[A_EXT]] + // CHECK-NEXT: [[AC_ADD_TRUNC:%.*]] = fptrunc float [[AC_ADD]] to half + // CHECK-NEXT: store half [[AC_ADD_TRUNC]], ptr {{.*}} + // CHECK-NEXT: [[RES:%.*]] = load half, ptr {{.*}} + // CHECK: ret half [[RES]] + c += a; + return c; +} Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -4403,6 +4403,9 @@ /// EmitLoadOfComplex - Load a complex number from the specified l-value. ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc); + ComplexPairTy EmitPromotedComplexExpr(const Expr *E, QualType DstTy); + llvm::Value *EmitPromotedScalarExpr(const Expr *E, QualType DstType); + Address emitAddrOfRealComponent(Address complex, QualType complexType); Address emitAddrOfImagComponent(Address complex, QualType complexType); Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -791,21 +791,47 @@ // Helper functions for fixed point binary operations. Value *EmitFixedPointBinOp(const BinOpInfo &Ops); - BinOpInfo EmitBinOps(const BinaryOperator *E); - LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E, - Value *(ScalarExprEmitter::*F)(const BinOpInfo &), - Value *&Result); + BinOpInfo EmitBinOps(const BinaryOperator *E, + QualType PromotionTy = QualType()); + + Value *EmitPromoted(const Expr *E, QualType PromotionTy); + + LValue EmitCompoundAssignLValue( + const CompoundAssignOperator *E, QualType PromotionType, + Value *(ScalarExprEmitter::*F)(const BinOpInfo &), Value *&Result); Value *EmitCompoundAssign(const CompoundAssignOperator *E, + QualType PromotionTy, Value *(ScalarExprEmitter::*F)(const BinOpInfo &)); + QualType getPromotionType(const Expr *E) { + if (E->getType()->isFloat16Type()) { + if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) + return CGF.getContext().FloatTy; + } + return QualType(); + } + // Binary operators and binary compound assignment operators. -#define HANDLEBINOP(OP) \ - Value *VisitBin ## OP(const BinaryOperator *E) { \ - return Emit ## OP(EmitBinOps(E)); \ - } \ - Value *VisitBin ## OP ## Assign(const CompoundAssignOperator *E) { \ - return EmitCompoundAssign(E, &ScalarExprEmitter::Emit ## OP); \ +#define HANDLEBINOP(OP) \ + Value *VisitBin##OP(const BinaryOperator *E) { \ + QualType promotionTy = getPromotionType(E); \ + auto result = Emit##OP(EmitBinOps(E, promotionTy)); \ + if (result) \ + if (!promotionTy.isNull()) \ + result = Builder.CreateFPTrunc(result, ConvertType(E->getType()), \ + "unpromotion"); \ + return result; \ + } \ + Value *VisitBin##OP##Assign(const CompoundAssignOperator *E) { \ + QualType promotionTy = getPromotionType(E); \ + auto result = \ + EmitCompoundAssign(E, promotionTy, &ScalarExprEmitter::Emit##OP); \ + if (result) \ + if (!promotionTy.isNull()) \ + result = Builder.CreateFPTrunc(result, ConvertType(E->getType()), \ + "unpromotion"); \ + return result; \ } HANDLEBINOP(Mul) HANDLEBINOP(Div) @@ -3050,12 +3076,45 @@ // Binary Operators //===----------------------------------------------------------------------===// -BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) { +Value *ScalarExprEmitter::EmitPromoted(const Expr *E, QualType PromotionType) { + if (auto BO = dyn_cast<BinaryOperator>(E)) { + switch (BO->getOpcode()) { +#define HANDLE_BINOP(OP) \ + case BO_##OP: \ + return Emit##OP(EmitBinOps(BO, PromotionType)); + HANDLE_BINOP(Add) + HANDLE_BINOP(Sub) + HANDLE_BINOP(Mul) + HANDLE_BINOP(Div) +#undef HANDLE_BINOP + default: + break; + } + } else { + auto result = Visit(const_cast<Expr *>(E)); + if (result) + return CGF.Builder.CreateFPExt(result, ConvertType(PromotionType), "ext"); + } + // fallback path + auto result = Visit(const_cast<Expr *>(E)); + if (result) + result = CGF.Builder.CreateFPExt(result, ConvertType(E->getType())); + return result; +} + +BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E, + QualType PromotionType) { TestAndClearIgnoreResultAssign(); BinOpInfo Result; - Result.LHS = Visit(E->getLHS()); - Result.RHS = Visit(E->getRHS()); - Result.Ty = E->getType(); + if (!PromotionType.isNull()) { + Result.LHS = CGF.EmitPromotedScalarExpr(E->getLHS(), PromotionType); + Result.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType); + Result.Ty = PromotionType; + } else { + Result.LHS = Visit(E->getLHS()); + Result.RHS = Visit(E->getRHS()); + Result.Ty = E->getType(); + } Result.Opcode = E->getOpcode(); Result.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); Result.E = E; @@ -3063,9 +3122,8 @@ } LValue ScalarExprEmitter::EmitCompoundAssignLValue( - const CompoundAssignOperator *E, - Value *(ScalarExprEmitter::*Func)(const BinOpInfo &), - Value *&Result) { + const CompoundAssignOperator *E, QualType PromotionType, + Value *(ScalarExprEmitter::*Func)(const BinOpInfo &), Value *&Result) { QualType LHSTy = E->getLHS()->getType(); BinOpInfo OpInfo; @@ -3074,8 +3132,13 @@ // Emit the RHS first. __block variables need to have the rhs evaluated // first, plus this should improve codegen a little. - OpInfo.RHS = Visit(E->getRHS()); - OpInfo.Ty = E->getComputationResultType(); + if (!PromotionType.isNull()) { + OpInfo.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType); + OpInfo.Ty = PromotionType; + } else { + OpInfo.RHS = Visit(E->getRHS()); + OpInfo.Ty = E->getComputationResultType(); + } OpInfo.Opcode = E->getOpcode(); OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); OpInfo.E = E; @@ -3094,7 +3157,8 @@ llvm::Instruction::BinaryOps Op; switch (OpInfo.Opcode) { // We don't have atomicrmw operands for *, %, /, <<, >> - case BO_MulAssign: case BO_DivAssign: + case BO_MulAssign: + case BO_DivAssign: case BO_RemAssign: case BO_ShlAssign: case BO_ShrAssign: @@ -3154,16 +3218,24 @@ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, OpInfo.FPFeatures); SourceLocation Loc = E->getExprLoc(); - OpInfo.LHS = - EmitScalarConversion(OpInfo.LHS, LHSTy, E->getComputationLHSType(), Loc); + if (!PromotionType.isNull()) + OpInfo.LHS = + EmitScalarConversion(OpInfo.LHS, LHSTy, PromotionType, E->getExprLoc()); + else + OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, + E->getComputationLHSType(), Loc); // Expand the binary operator. Result = (this->*Func)(OpInfo); // Convert the result back to the LHS type, // potentially with Implicit Conversion sanitizer check. - Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, - Loc, ScalarConversionOpts(CGF.SanOpts)); + if (!PromotionType.isNull()) + Result = EmitScalarConversion(Result, PromotionType, LHSTy, Loc, + ScalarConversionOpts(CGF.SanOpts)); + else + Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, + Loc, ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); @@ -3193,11 +3265,12 @@ return LHSLV; } -Value *ScalarExprEmitter::EmitCompoundAssign(const CompoundAssignOperator *E, - Value *(ScalarExprEmitter::*Func)(const BinOpInfo &)) { +Value *ScalarExprEmitter::EmitCompoundAssign( + const CompoundAssignOperator *E, QualType PromotionTy, + Value *(ScalarExprEmitter::*Func)(const BinOpInfo &)) { bool Ignore = TestAndClearIgnoreResultAssign(); Value *RHS = nullptr; - LValue LHS = EmitCompoundAssignLValue(E, Func, RHS); + LValue LHS = EmitCompoundAssignLValue(E, PromotionTy, Func, RHS); // If the result is clearly ignored, return now. if (Ignore) @@ -4896,6 +4969,12 @@ .EmitComplexToScalarConversion(Src, SrcTy, DstTy, Loc); } +Value * +CodeGenFunction::EmitPromotedScalarExpr(const Expr *E, + QualType DstType) { + return ScalarExprEmitter(*this).EmitPromoted(E, DstType); +} + llvm::Value *CodeGenFunction:: EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, @@ -4930,8 +5009,8 @@ switch (E->getOpcode()) { #define COMPOUND_OP(Op) \ case BO_##Op##Assign: \ - return Scalar.EmitCompoundAssignLValue(E, &ScalarExprEmitter::Emit##Op, \ - Result) + return Scalar.EmitCompoundAssignLValue( \ + E, Scalar.getPromotionType(E), &ScalarExprEmitter::Emit##Op, Result) COMPOUND_OP(Mul); COMPOUND_OP(Div); COMPOUND_OP(Rem); Index: clang/lib/CodeGen/CGExprComplex.cpp =================================================================== --- clang/lib/CodeGen/CGExprComplex.cpp +++ clang/lib/CodeGen/CGExprComplex.cpp @@ -253,7 +253,9 @@ QualType Ty; // Computation Type. }; - BinOpInfo EmitBinOps(const BinaryOperator *E); + BinOpInfo EmitBinOps(const BinaryOperator *E, + QualType PromotionTy = QualType()); + ComplexPairTy EmitPromoted(const Expr *E, QualType PromotionTy); LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E, ComplexPairTy (ComplexExprEmitter::*Func) (const BinOpInfo &), @@ -270,18 +272,43 @@ ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); - ComplexPairTy VisitBinAdd(const BinaryOperator *E) { - return EmitBinAdd(EmitBinOps(E)); - } - ComplexPairTy VisitBinSub(const BinaryOperator *E) { - return EmitBinSub(EmitBinOps(E)); - } - ComplexPairTy VisitBinMul(const BinaryOperator *E) { - return EmitBinMul(EmitBinOps(E)); - } - ComplexPairTy VisitBinDiv(const BinaryOperator *E) { - return EmitBinDiv(EmitBinOps(E)); - } + QualType getPromotionType(const Expr *E) { + assert(E->getType()->isAnyComplexType() && + "Expecting to promote a complex type!"); + QualType ElementType = + E->getType()->castAs<ComplexType>()->getElementType(); + if (ElementType->isFloat16Type()) + if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + return QualType(); + } + +#define HANDLEBINOP(OP) \ + ComplexPairTy VisitBin##OP(const BinaryOperator *E) { \ + QualType promotionTy = getPromotionType(E); \ + ComplexPairTy result = EmitBin##OP(EmitBinOps(E, promotionTy)); \ + if (!promotionTy.isNull()) { \ + if (result.first) \ + result.first = Builder.CreateFPTrunc( \ + result.first, \ + CGF.ConvertType( \ + E->getType()->castAs<ComplexType>()->getElementType()), \ + "unpromotion"); \ + if (result.second) \ + result.second = Builder.CreateFPTrunc( \ + result.second, \ + CGF.ConvertType( \ + E->getType()->castAs<ComplexType>()->getElementType()), \ + "unpromotion"); \ + } \ + return result; \ + } + + HANDLEBINOP(Mul) + HANDLEBINOP(Div) + HANDLEBINOP(Add) + HANDLEBINOP(Sub) +#undef HANDLEBINOP ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) { return Visit(E->getSemanticForm()); @@ -876,25 +903,95 @@ return ComplexPairTy(DSTr, DSTi); } +ComplexPairTy ComplexExprEmitter::EmitPromoted(const Expr *E, + QualType PromotionType) { + if (auto BO = dyn_cast<BinaryOperator>(E)) { + switch (BO->getOpcode()) { +#define HANDLE_BINOP(OP) \ + case BO_##OP: \ + return EmitBin##OP(EmitBinOps(BO, PromotionType)); + HANDLE_BINOP(Add) + HANDLE_BINOP(Sub) + HANDLE_BINOP(Mul) + HANDLE_BINOP(Div) +#undef HANDLE_BINOP + default: + break; + } + } else { + ComplexPairTy Result = Visit(const_cast<Expr *>(E)); + llvm::Value *Resultr = CGF.Builder.CreateFPExt( + Result.first, + CGF.ConvertType(PromotionType->castAs<ComplexType>()->getElementType()), + "ext"); + llvm::Value *Resulti = CGF.Builder.CreateFPExt( + Result.second, + CGF.ConvertType(PromotionType->castAs<ComplexType>()->getElementType()), + "ext"); + return ComplexPairTy(Resultr, Resulti); + } + // fallback path + ComplexPairTy Result = Visit(const_cast<Expr *>(E)); + llvm::Value *Resultr = CGF.Builder.CreateFPExt( + Result.first, + CGF.ConvertType(E->getType()->castAs<ComplexType>()->getElementType())); + llvm::Value *Resulti = CGF.Builder.CreateFPExt( + Result.second, + CGF.ConvertType(E->getType()->castAs<ComplexType>()->getElementType())); + return ComplexPairTy(Resultr, Resulti); +} + +ComplexPairTy CodeGenFunction::EmitPromotedComplexExpr(const Expr *E, + QualType DstTy) { + return ComplexExprEmitter(*this).EmitPromoted(E, DstTy); +} + ComplexExprEmitter::BinOpInfo -ComplexExprEmitter::EmitBinOps(const BinaryOperator *E) { +ComplexExprEmitter::EmitBinOps(const BinaryOperator *E, + QualType PromotionType) { TestAndClearIgnoreReal(); TestAndClearIgnoreImag(); BinOpInfo Ops; - if (E->getLHS()->getType()->isRealFloatingType()) - Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr); - else - Ops.LHS = Visit(E->getLHS()); - if (E->getRHS()->getType()->isRealFloatingType()) - Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr); - else - Ops.RHS = Visit(E->getRHS()); - Ops.Ty = E->getType(); + if (E->getLHS()->getType()->isRealFloatingType()) { + if (!PromotionType.isNull()) + Ops.LHS = ComplexPairTy( + CGF.EmitPromotedScalarExpr( + E->getLHS(), + PromotionType->castAs<ComplexType>()->getElementType()), + nullptr); + else + Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr); + } else { + if (!PromotionType.isNull()) + Ops.LHS = ComplexPairTy( + CGF.EmitPromotedComplexExpr(E->getLHS(), PromotionType)); + else + Ops.LHS = Visit(E->getLHS()); + } + if (E->getRHS()->getType()->isRealFloatingType()) { + if (!PromotionType.isNull()) + Ops.RHS = ComplexPairTy( + CGF.EmitPromotedScalarExpr( + E->getRHS(), + PromotionType->castAs<ComplexType>()->getElementType()), + nullptr); + else + Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr); + } else { + if (!PromotionType.isNull()) + Ops.RHS = ComplexPairTy( + CGF.EmitPromotedComplexExpr(E->getRHS(), PromotionType)); + else + Ops.RHS = Visit(E->getRHS()); + } + if (!PromotionType.isNull()) + Ops.Ty = PromotionType; + else + Ops.Ty = E->getType(); return Ops; } - LValue ComplexExprEmitter:: EmitCompoundAssignLValue(const CompoundAssignOperator *E, ComplexPairTy (ComplexExprEmitter::*Func)(const BinOpInfo&), Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -286,6 +286,10 @@ return false; } + bool shouldEmitFloat16WithExcessPrecision() const { + return HasFloat16 && !hasLegalHalfType(); + } + void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -239,6 +239,7 @@ HasAVX512ER = true; } else if (Feature == "+avx512fp16") { HasAVX512FP16 = true; + HasLegalHalfType = true; } else if (Feature == "+avx512pf") { HasAVX512PF = true; } else if (Feature == "+avx512dq") { @@ -371,6 +372,8 @@ .Default(NoXOP); XOPLevel = std::max(XOPLevel, XLevel); } + // Turn on _float16 for x86 (feature sse2) + HasFloat16 = SSELevel >= SSE2; // LLVM doesn't have a separate switch for fpmath, so only accept it if it // matches the selected sse level. Index: clang/include/clang/Basic/TargetInfo.h =================================================================== --- clang/include/clang/Basic/TargetInfo.h +++ clang/include/clang/Basic/TargetInfo.h @@ -909,6 +909,8 @@ return true; } + virtual bool shouldEmitFloat16WithExcessPrecision() const { return false; } + /// Specify if mangling based on address space map should be used or /// not for language specific address spaces bool useAddressSpaceMapMangling() const { Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -527,6 +527,8 @@ handled incorrectly by some software (e.g. new failures with incorrect assertions). +- Support for ``_Float16`` type has been added. + Arm and AArch64 Support in Clang -------------------------------- Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -749,7 +749,11 @@ includes all 64-bit and all recent 32-bit processors. When the target supports AVX512-FP16, ``_Float16`` arithmetic is performed using that native support. Otherwise, ``_Float16`` arithmetic is performed by promoting to ``float``, -performing the operation, and then truncating to ``_Float16``. +performing the operation, and then truncating to ``_Float16``. When doing this +emulation, Clang defaults to following the C standard's rules for excess +precision arithmetic, which avoids intermediate truncations within statements +and may generate different results from a strict operation-by-operation +emulation. ``_Float16`` will be supported on more targets as they define ABIs for it.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits