zahiraam updated this revision to Diff 451261. zahiraam marked 7 inline comments as done.
CHANGES SINCE LAST ACTION https://reviews.llvm.org/D113107/new/ https://reviews.llvm.org/D113107 Files: clang/docs/LanguageExtensions.rst clang/include/clang/Basic/TargetInfo.h clang/lib/Basic/Targets/X86.cpp clang/lib/Basic/Targets/X86.h clang/lib/CodeGen/CGExprComplex.cpp clang/lib/CodeGen/CGExprScalar.cpp clang/lib/CodeGen/CodeGenFunction.h clang/test/CodeGen/X86/Float16-arithmetic.c clang/test/CodeGen/X86/Float16-complex.c clang/test/Sema/Float16.c clang/test/SemaCXX/Float16.cpp
Index: clang/test/SemaCXX/Float16.cpp =================================================================== --- clang/test/SemaCXX/Float16.cpp +++ clang/test/SemaCXX/Float16.cpp @@ -1,20 +1,10 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s -// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -#ifdef HAVE // expected-no-diagnostics -#endif // HAVE -#ifndef HAVE -// expected-error@+2{{_Float16 is not supported on this target}} -#endif // !HAVE _Float16 f; -#ifndef HAVE -// expected-error@+2{{invalid suffix 'F16' on floating constant}} -#endif // !HAVE const auto g = 1.1F16; Index: clang/test/Sema/Float16.c =================================================================== --- clang/test/Sema/Float16.c +++ clang/test/Sema/Float16.c @@ -1,19 +1,15 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s -// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -#ifndef HAVE -// expected-error@+2{{_Float16 is not supported on this target}} -#endif // HAVE -_Float16 f; - -#ifdef HAVE _Complex _Float16 a; void builtin_complex(void) { _Float16 a = 0; (void)__builtin_complex(a, a); // expected-error {{'_Complex _Float16' is invalid}} } -#endif Index: clang/test/CodeGen/X86/Float16-complex.c =================================================================== --- clang/test/CodeGen/X86/Float16-complex.c +++ clang/test/CodeGen/X86/Float16-complex.c @@ -1,134 +1,995 @@ -// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86 -// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefixes=AVX +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefixes=X86 +// AVX-LABEL: @add_half_rr( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[ADD]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP2]] +// +// X86-LABEL: @add_half_rr( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// X86-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// X86-NEXT: [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP2]] +// _Float16 _Complex add_half_rr(_Float16 a, _Float16 b) { - // X86-LABEL: @add_half_rr( - // X86: fadd - // X86-NOT: fadd - // X86: ret + + return a + b; } + +// AVX-LABEL: @add_half_cr( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[A_REAL]], [[TMP0]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[ADD_R]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[A_IMAG]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP1]] +// +// X86-LABEL: @add_half_cr( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2 +// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT2]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half +// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[EXT1]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP1]] +// _Float16 _Complex add_half_cr(_Float16 _Complex a, _Float16 b) { - // X86-LABEL: @add_half_cr( - // X86: fadd - // X86-NOT: fadd - // X86: ret + + return a + b; } + +// AVX-LABEL: @add_half_rc( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[TMP0]], [[B_REAL]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[ADD_R]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[B_IMAG]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP1]] +// +// X86-LABEL: @add_half_rc( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// X86-NEXT: [[EXT1:%.*]] = fpext half [[B_REAL]] to float +// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_IMAG]] to float +// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT1]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half +// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[EXT2]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP1]] +// _Float16 _Complex add_half_rc(_Float16 a, _Float16 _Complex b) { - // X86-LABEL: @add_half_rc( - // X86: fadd - // X86-NOT: fadd - // X86: ret + + return a + b; } + +// AVX-LABEL: @add_half_cc( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[A_REAL]], [[B_REAL]] +// AVX-NEXT: [[ADD_I:%.*]] = fadd half [[A_IMAG]], [[B_IMAG]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[ADD_R]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[ADD_I]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP0]] +// +// X86-LABEL: @add_half_cc( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// X86-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT2]] +// X86-NEXT: [[ADD_I:%.*]] = fadd float [[EXT1]], [[EXT3]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half +// X86-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[ADD_I]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP0]] +// _Float16 _Complex add_half_cc(_Float16 _Complex a, _Float16 _Complex b) { - // X86-LABEL: @add_half_cc( - // X86: fadd - // X86: fadd - // X86-NOT: fadd - // X86: ret + + return a + b; } +// AVX-LABEL: @sub_half_rr( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[SUB:%.*]] = fsub half [[TMP0]], [[TMP1]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[SUB]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP2]] +// +// X86-LABEL: @sub_half_rr( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// X86-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// X86-NEXT: [[SUB:%.*]] = fsub float [[EXT]], [[EXT1]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP2]] +// _Float16 _Complex sub_half_rr(_Float16 a, _Float16 b) { - // X86-LABEL: @sub_half_rr( - // X86: fsub - // X86-NOT: fsub - // X86: ret + + return a - b; } + +// AVX-LABEL: @sub_half_cr( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[SUB_R:%.*]] = fsub half [[A_REAL]], [[TMP0]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[SUB_R]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[A_IMAG]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP1]] +// +// X86-LABEL: @sub_half_cr( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2 +// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT2]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half +// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[EXT1]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP1]] +// _Float16 _Complex sub_half_cr(_Float16 _Complex a, _Float16 b) { - // X86-LABEL: @sub_half_cr( - // X86: fsub - // X86-NOT: fsub - // X86: ret + + return a - b; } + +// AVX-LABEL: @sub_half_rc( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// AVX-NEXT: [[SUB_R:%.*]] = fsub half [[TMP0]], [[B_REAL]] +// AVX-NEXT: [[SUB_I:%.*]] = fneg half [[B_IMAG]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[SUB_R]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[SUB_I]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP1]] +// +// X86-LABEL: @sub_half_rc( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// X86-NEXT: [[EXT1:%.*]] = fpext half [[B_REAL]] to float +// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_IMAG]] to float +// X86-NEXT: [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT1]] +// X86-NEXT: [[SUB_I:%.*]] = fneg float [[EXT2]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half +// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[SUB_I]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP1]] +// _Float16 _Complex sub_half_rc(_Float16 a, _Float16 _Complex b) { - // X86-LABEL: @sub_half_rc( - // X86: fsub - // X86: fneg - // X86-NOT: fsub - // X86: ret + + return a - b; } + +// AVX-LABEL: @sub_half_cc( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// AVX-NEXT: [[SUB_R:%.*]] = fsub half [[A_REAL]], [[B_REAL]] +// AVX-NEXT: [[SUB_I:%.*]] = fsub half [[A_IMAG]], [[B_IMAG]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[SUB_R]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[SUB_I]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP0]] +// +// X86-LABEL: @sub_half_cc( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// X86-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// X86-NEXT: [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT2]] +// X86-NEXT: [[SUB_I:%.*]] = fsub float [[EXT1]], [[EXT3]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half +// X86-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[SUB_I]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP0]] +// _Float16 _Complex sub_half_cc(_Float16 _Complex a, _Float16 _Complex b) { - // X86-LABEL: @sub_half_cc( - // X86: fsub - // X86: fsub - // X86-NOT: fsub - // X86: ret + + return a - b; } +// AVX-LABEL: @mul_half_rr( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[MUL]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP2]] +// +// X86-LABEL: @mul_half_rr( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// X86-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// X86-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP2]] +// _Float16 _Complex mul_half_rr(_Float16 a, _Float16 b) { - // X86-LABEL: @mul_half_rr( - // X86: fmul - // X86-NOT: fmul - // X86: ret + + return a * b; } + +// AVX-LABEL: @mul_half_cr( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[MUL_RL:%.*]] = fmul half [[A_REAL]], [[TMP0]] +// AVX-NEXT: [[MUL_IL:%.*]] = fmul half [[A_IMAG]], [[TMP0]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[MUL_RL]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[MUL_IL]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP1]] +// +// X86-LABEL: @mul_half_cr( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2 +// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[MUL_RL:%.*]] = fmul float [[EXT]], [[EXT2]] +// X86-NEXT: [[MUL_IL:%.*]] = fmul float [[EXT1]], [[EXT2]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_RL]] to half +// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[MUL_IL]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP1]] +// _Float16 _Complex mul_half_cr(_Float16 _Complex a, _Float16 b) { - // X86-LABEL: @mul_half_cr( - // X86: fmul - // X86: fmul - // X86-NOT: fmul - // X86: ret + + return a * b; } + +// AVX-LABEL: @mul_half_rc( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// AVX-NEXT: [[MUL_RL:%.*]] = fmul half [[TMP0]], [[B_REAL]] +// AVX-NEXT: [[MUL_IR:%.*]] = fmul half [[TMP0]], [[B_IMAG]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[MUL_RL]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[MUL_IR]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP1]] +// +// X86-LABEL: @mul_half_rc( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// X86-NEXT: [[EXT1:%.*]] = fpext half [[B_REAL]] to float +// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_IMAG]] to float +// X86-NEXT: [[MUL_RL:%.*]] = fmul float [[EXT]], [[EXT1]] +// X86-NEXT: [[MUL_IR:%.*]] = fmul float [[EXT]], [[EXT2]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_RL]] to half +// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[MUL_IR]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP1]] +// _Float16 _Complex mul_half_rc(_Float16 a, _Float16 _Complex b) { - // X86-LABEL: @mul_half_rc( - // X86: fmul - // X86: fmul - // X86-NOT: fmul - // X86: ret + + return a * b; } + +// AVX-LABEL: @mul_half_cc( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[COERCE:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// AVX-NEXT: [[MUL_AC:%.*]] = fmul half [[A_REAL]], [[B_REAL]] +// AVX-NEXT: [[MUL_BD:%.*]] = fmul half [[A_IMAG]], [[B_IMAG]] +// AVX-NEXT: [[MUL_AD:%.*]] = fmul half [[A_REAL]], [[B_IMAG]] +// AVX-NEXT: [[MUL_BC:%.*]] = fmul half [[A_IMAG]], [[B_REAL]] +// AVX-NEXT: [[MUL_R:%.*]] = fsub half [[MUL_AC]], [[MUL_BD]] +// AVX-NEXT: [[MUL_I:%.*]] = fadd half [[MUL_AD]], [[MUL_BC]] +// AVX-NEXT: [[ISNAN_CMP:%.*]] = fcmp uno half [[MUL_R]], [[MUL_R]] +// AVX-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] +// AVX: complex_mul_imag_nan: +// AVX-NEXT: [[ISNAN_CMP1:%.*]] = fcmp uno half [[MUL_I]], [[MUL_I]] +// AVX-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// AVX: complex_mul_libcall: +// AVX-NEXT: [[CALL:%.*]] = call <2 x half> @__mulhc3(half noundef [[A_REAL]], half noundef [[A_IMAG]], half noundef [[B_REAL]], half noundef [[B_IMAG]]) #[[ATTR1:[0-9]+]] +// AVX-NEXT: store <2 x half> [[CALL]], ptr [[COERCE]], align 2 +// AVX-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 0 +// AVX-NEXT: [[COERCE_REAL:%.*]] = load half, ptr [[COERCE_REALP]], align 2 +// AVX-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 1 +// AVX-NEXT: [[COERCE_IMAG:%.*]] = load half, ptr [[COERCE_IMAGP]], align 2 +// AVX-NEXT: br label [[COMPLEX_MUL_CONT]] +// AVX: complex_mul_cont: +// AVX-NEXT: [[REAL_MUL_PHI:%.*]] = phi half [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] +// AVX-NEXT: [[IMAG_MUL_PHI:%.*]] = phi half [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP0]] +// +// X86-LABEL: @mul_half_cc( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// X86-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// X86-NEXT: [[MUL_AC:%.*]] = fmul float [[EXT]], [[EXT2]] +// X86-NEXT: [[MUL_BD:%.*]] = fmul float [[EXT1]], [[EXT3]] +// X86-NEXT: [[MUL_AD:%.*]] = fmul float [[EXT]], [[EXT3]] +// X86-NEXT: [[MUL_BC:%.*]] = fmul float [[EXT1]], [[EXT2]] +// X86-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]] +// X86-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]] +// X86-NEXT: [[ISNAN_CMP:%.*]] = fcmp uno float [[MUL_R]], [[MUL_R]] +// X86-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]] +// X86: complex_mul_imag_nan: +// X86-NEXT: [[ISNAN_CMP4:%.*]] = fcmp uno float [[MUL_I]], [[MUL_I]] +// X86-NEXT: br i1 [[ISNAN_CMP4]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] +// X86: complex_mul_libcall: +// X86-NEXT: [[CALL:%.*]] = call <2 x float> @__mulsc3(float noundef [[EXT]], float noundef [[EXT1]], float noundef [[EXT2]], float noundef [[EXT3]]) #[[ATTR2:[0-9]+]] +// X86-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// X86-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// X86-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// X86-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// X86-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// X86-NEXT: br label [[COMPLEX_MUL_CONT]] +// X86: complex_mul_cont: +// X86-NEXT: [[REAL_MUL_PHI:%.*]] = phi float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ] +// X86-NEXT: [[IMAG_MUL_PHI:%.*]] = phi float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half +// X86-NEXT: [[UNPROMOTION5:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION5]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP0]] +// _Float16 _Complex mul_half_cc(_Float16 _Complex a, _Float16 _Complex b) { - // X86-LABEL: @mul_half_cc( - // X86: %[[AC:[^ ]+]] = fmul - // X86: %[[BD:[^ ]+]] = fmul - // X86: %[[AD:[^ ]+]] = fmul - // X86: %[[BC:[^ ]+]] = fmul - // X86: %[[RR:[^ ]+]] = fsub half %[[AC]], %[[BD]] - // X86: %[[RI:[^ ]+]] = fadd half - // X86-DAG: %[[AD]] - // X86-DAG: , - // X86-DAG: %[[BC]] - // X86: fcmp uno half %[[RR]] - // X86: fcmp uno half %[[RI]] - // X86: call {{.*}} @__mulhc3( - // X86: ret + + return a * b; } - +// AVX-LABEL: @div_half_rr( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[DIV:%.*]] = fdiv half [[TMP0]], [[TMP1]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[DIV]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP2]] +// +// X86-LABEL: @div_half_rr( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// X86-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// X86-NEXT: [[DIV:%.*]] = fdiv float [[EXT]], [[EXT1]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[DIV]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP2]] +// _Float16 _Complex div_half_rr(_Float16 a, _Float16 b) { - // X86-LABEL: @div_half_rr( - // X86: fdiv - // X86-NOT: fdiv - // X86: ret + + return a / b; } + +// AVX-LABEL: @div_half_cr( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = fdiv half [[A_REAL]], [[TMP0]] +// AVX-NEXT: [[TMP2:%.*]] = fdiv half [[A_IMAG]], [[TMP0]] +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[TMP1]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[TMP2]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP3]] +// +// X86-LABEL: @div_half_cr( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2 +// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[TMP1:%.*]] = fdiv float [[EXT]], [[EXT2]] +// X86-NEXT: [[TMP2:%.*]] = fdiv float [[EXT1]], [[EXT2]] +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP1]] to half +// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[TMP2]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP3]] +// _Float16 _Complex div_half_cr(_Float16 _Complex a, _Float16 b) { - // X86-LABEL: @div_half_cr( - // X86: fdiv - // X86: fdiv - // X86-NOT: fdiv - // X86: ret + + return a / b; } +// AVX-LABEL: @div_half_rc( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: [[COERCE:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// AVX-NEXT: [[CALL:%.*]] = call <2 x half> @__divhc3(half noundef [[TMP0]], half noundef 0xH0000, half noundef [[B_REAL]], half noundef [[B_IMAG]]) #[[ATTR1]] +// AVX-NEXT: store <2 x half> [[CALL]], ptr [[COERCE]], align 2 +// AVX-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 0 +// AVX-NEXT: [[COERCE_REAL:%.*]] = load half, ptr [[COERCE_REALP]], align 2 +// AVX-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 1 +// AVX-NEXT: [[COERCE_IMAG:%.*]] = load half, ptr [[COERCE_IMAGP]], align 2 +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[COERCE_REAL]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[COERCE_IMAG]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP1]] +// +// X86-LABEL: @div_half_rc( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// X86-NEXT: [[EXT1:%.*]] = fpext half [[B_REAL]] to float +// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_IMAG]] to float +// X86-NEXT: [[CALL:%.*]] = call <2 x float> @__divsc3(float noundef [[EXT]], float noundef 0.000000e+00, float noundef [[EXT1]], float noundef [[EXT2]]) #[[ATTR2]] +// X86-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// X86-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// X86-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// X86-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// X86-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[COERCE_REAL]] to half +// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[COERCE_IMAG]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP1]] +// _Float16 _Complex div_half_rc(_Float16 a, _Float16 _Complex b) { - // X86-LABEL: @div_half_rc( - // X86-NOT: fdiv - // X86: call {{.*}} @__divhc3( - // X86: ret + return a / b; } + +// AVX-LABEL: @div_half_cc( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[COERCE:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// AVX-NEXT: [[CALL:%.*]] = call <2 x half> @__divhc3(half noundef [[A_REAL]], half noundef [[A_IMAG]], half noundef [[B_REAL]], half noundef [[B_IMAG]]) #[[ATTR1]] +// AVX-NEXT: store <2 x half> [[CALL]], ptr [[COERCE]], align 2 +// AVX-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 0 +// AVX-NEXT: [[COERCE_REAL:%.*]] = load half, ptr [[COERCE_REALP]], align 2 +// AVX-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 1 +// AVX-NEXT: [[COERCE_IMAG:%.*]] = load half, ptr [[COERCE_IMAGP]], align 2 +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[COERCE_REAL]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half [[COERCE_IMAG]], ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP0]] +// +// X86-LABEL: @div_half_cc( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4 +// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2 +// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0 +// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2 +// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1 +// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2 +// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float +// X86-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float +// X86-NEXT: [[CALL:%.*]] = call <2 x float> @__divsc3(float noundef [[EXT]], float noundef [[EXT1]], float noundef [[EXT2]], float noundef [[EXT3]]) #[[ATTR2]] +// X86-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4 +// X86-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0 +// X86-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4 +// X86-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1 +// X86-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4 +// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[COERCE_REAL]] to half +// X86-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[COERCE_IMAG]] to half +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP0]] +// _Float16 _Complex div_half_cc(_Float16 _Complex a, _Float16 _Complex b) { - // X86-LABEL: @div_half_cc( - // X86-NOT: fdiv - // X86: call {{.*}} @__divhc3( - // X86: ret + + + return a / b; } + +// AVX-LABEL: @addcompound_half_rr( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// AVX-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2 +// AVX-NEXT: [[ADD:%.*]] = fadd half [[TMP1]], [[TMP0]] +// AVX-NEXT: store half [[ADD]], ptr [[C_ADDR]], align 2 +// AVX-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[TMP2]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP3]] +// +// X86-LABEL: @addcompound_half_rr( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// X86-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2 +// X86-NEXT: [[CONV:%.*]] = fpext half [[TMP1]] to float +// X86-NEXT: [[ADD:%.*]] = fadd float [[CONV]], [[EXT]] +// X86-NEXT: [[CONV1:%.*]] = fptrunc float [[ADD]] to half +// X86-NEXT: store half [[CONV1]], ptr [[C_ADDR]], align 2 +// X86-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[TMP2]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP3]] +// +_Float16 _Complex addcompound_half_rr(_Float16 a, _Float16 c) { + + c += a; + return c; +} + +// AVX-LABEL: @addcompound_half_cr( +// AVX-NEXT: entry: +// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// AVX-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// AVX-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 +// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2 +// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[TMP0]], [[A_REAL]] +// AVX-NEXT: store half [[ADD_R]], ptr [[C_ADDR]], align 2 +// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2 +// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// AVX-NEXT: store half [[TMP1]], ptr [[RETVAL_REALP]], align 2 +// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// AVX-NEXT: ret <2 x half> [[TMP2]] +// +// X86-LABEL: @addcompound_half_cr( +// X86-NEXT: entry: +// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// X86-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// X86-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 +// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2 +// X86-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to float +// X86-NEXT: [[ADD_R:%.*]] = fadd float [[CONV]], [[EXT]] +// X86-NEXT: [[CONV2:%.*]] = fptrunc float [[ADD_R]] to half +// X86-NEXT: store half [[CONV2]], ptr [[C_ADDR]], align 2 +// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2 +// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0 +// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1 +// X86-NEXT: store half [[TMP1]], ptr [[RETVAL_REALP]], align 2 +// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2 +// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2 +// X86-NEXT: ret <2 x half> [[TMP2]] +// +_Float16 _Complex addcompound_half_cr(_Float16 _Complex a, _Float16 c) { + + c += a; + return c; +} + Index: clang/test/CodeGen/X86/Float16-arithmetic.c =================================================================== --- clang/test/CodeGen/X86/Float16-arithmetic.c +++ clang/test/CodeGen/X86/Float16-arithmetic.c @@ -1,7 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s - // CHECK-LABEL: @add1( // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 @@ -9,9 +8,12 @@ // CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 // CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 -// CHECK-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]] -// CHECK-NEXT: ret half [[ADD]] +// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half +// CHECK-NEXT: ret half [[UNPROMOTION]] // _Float16 add1(_Float16 a, _Float16 b) { return a + b; @@ -26,11 +28,15 @@ // CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 // CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 -// CHECK-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]] // CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 -// CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ADD]], [[TMP2]] -// CHECK-NEXT: ret half [[ADD1]] +// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[TMP2]] to float +// CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ADD]], [[EXT2]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD3]] to half +// CHECK-NEXT: ret half [[UNPROMOTION]] // _Float16 add2(_Float16 a, _Float16 b, _Float16 c) { return a + b + c; @@ -43,9 +49,12 @@ // CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 // CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 -// CHECK-NEXT: [[DIV:%.*]] = fdiv half [[TMP0]], [[TMP1]] -// CHECK-NEXT: ret half [[DIV]] +// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// CHECK-NEXT: [[DIV:%.*]] = fdiv float [[EXT]], [[EXT1]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[DIV]] to half +// CHECK-NEXT: ret half [[UNPROMOTION]] // _Float16 div(_Float16 a, _Float16 b) { return a / b; @@ -58,9 +67,12 @@ // CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 // CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 -// CHECK-NEXT: [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]] -// CHECK-NEXT: ret half [[MUL]] +// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL]] to half +// CHECK-NEXT: ret half [[UNPROMOTION]] // _Float16 mul(_Float16 a, _Float16 b) { return a * b; @@ -77,13 +89,18 @@ // CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 // CHECK-NEXT: store half [[D:%.*]], ptr [[D_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 -// CHECK-NEXT: [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]] // CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[TMP2]] to float // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2 -// CHECK-NEXT: [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]] -// CHECK-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[MUL1]] -// CHECK-NEXT: ret half [[ADD]] +// CHECK-NEXT: [[EXT3:%.*]] = fpext half [[TMP3]] to float +// CHECK-NEXT: [[MUL4:%.*]] = fmul float [[EXT2]], [[EXT3]] +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[MUL4]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half +// CHECK-NEXT: ret half [[UNPROMOTION]] // _Float16 add_and_mul1(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { return a * b + c * d; @@ -100,13 +117,154 @@ // CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 // CHECK-NEXT: store half [[D:%.*]], ptr [[D_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 -// CHECK-NEXT: [[MUL:%.*]] = fmul half 0xH4600, [[TMP1]] -// CHECK-NEXT: [[SUB:%.*]] = fsub half [[TMP0]], [[MUL]] +// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float +// CHECK-NEXT: [[MUL:%.*]] = fmul float 6.000000e+00, [[EXT1]] +// CHECK-NEXT: [[SUB:%.*]] = fsub float [[EXT]], [[MUL]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB]] to half +// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[UNPROMOTION]] to float // CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 -// CHECK-NEXT: [[ADD:%.*]] = fadd half [[SUB]], [[TMP2]] -// CHECK-NEXT: ret half [[ADD]] +// CHECK-NEXT: [[EXT3:%.*]] = fpext half [[TMP2]] to float +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[EXT2]], [[EXT3]] +// CHECK-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[ADD]] to half +// CHECK-NEXT: ret half [[UNPROMOTION4]] // _Float16 add_and_mul2(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { return (a - 6 * b) + c; } + +// CHECK-LABEL: @addcompound( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[CONV:%.*]] = fpext half [[TMP1]] to float +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[CONV]], [[EXT]] +// CHECK-NEXT: [[CONV1:%.*]] = fptrunc float [[ADD]] to half +// CHECK-NEXT: store half [[CONV1]], ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-NEXT: ret half [[TMP2]] +// +_Float16 addcompound(_Float16 a, _Float16 c) { + c += a; + return c; +} + +// CHECK-LABEL: @mulcompound_int_float16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[EXT]] +// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[MUL]] to i32 +// CHECK-NEXT: store i32 [[CONV1]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2 +// CHECK-NEXT: ret half [[TMP2]] +// +_Float16 mulcompound_int_float16(int a, _Float16 c) { + a *= c; + return c; +} + +// CHECK-LABEL: @mulcompound_float_float16c( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[C:%.*]] = alloca { half, half }, align 2 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2 +// CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// CHECK-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2 +// CHECK-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1 +// CHECK-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 +// CHECK-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to float +// CHECK-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to float +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[MUL_RL:%.*]] = fmul float [[TMP0]], [[CONV]] +// CHECK-NEXT: [[MUL_IR:%.*]] = fmul float [[TMP0]], [[CONV1]] +// CHECK-NEXT: store float [[MUL_RL]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[C_REALP2:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0 +// CHECK-NEXT: [[C_REAL3:%.*]] = load half, ptr [[C_REALP2]], align 2 +// CHECK-NEXT: ret half [[C_REAL3]] +// +_Float16 mulcompound_float_float16c(float a, _Float16 _Complex c) { + a *= c; + return c; +} + +// CHECK-LABEL: @RealOp( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// CHECK-NEXT: store float [[EXT]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret half [[TMP1]] +// +_Float16 RealOp(_Float16 a) { + return __real a; +} + +// CHECK-LABEL: @RealOp_c( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2 +// CHECK-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// CHECK-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// CHECK-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// CHECK-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// CHECK-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// CHECK-NEXT: store float [[EXT]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret half [[TMP0]] +// +_Float16 RealOp_c(_Float16 _Complex a) { + return __real a; +} + +// CHECK-LABEL: @ImagOp( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float +// CHECK-NEXT: store float 0.000000e+00, ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret half [[TMP1]] +// +_Float16 ImagOp(_Float16 a) { + return __imag a; +} + +// CHECK-LABEL: @ImagOp_c( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2 +// CHECK-NEXT: [[A:%.*]] = alloca { half, half }, align 2 +// CHECK-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2 +// CHECK-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2 +// CHECK-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1 +// CHECK-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2 +// CHECK-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float +// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float +// CHECK-NEXT: store float [[EXT1]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret half [[TMP0]] +// +_Float16 ImagOp_c(_Float16 _Complex a) { + return __imag a; +} Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -4409,6 +4409,9 @@ /// EmitLoadOfComplex - Load a complex number from the specified l-value. ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc); + ComplexPairTy EmitPromotedComplexExpr(const Expr *E, QualType PromotionType); + llvm::Value *EmitPromotedScalarExpr(const Expr *E, QualType PromotionType); + Address emitAddrOfRealComponent(Address complex, QualType complexType); Address emitAddrOfImagComponent(Address complex, QualType complexType); Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -790,21 +790,44 @@ // Helper functions for fixed point binary operations. Value *EmitFixedPointBinOp(const BinOpInfo &Ops); - BinOpInfo EmitBinOps(const BinaryOperator *E); + BinOpInfo EmitBinOps(const BinaryOperator *E, + QualType PromotionTy = QualType()); + + Value *EmitPromoted(const Expr *E, QualType PromotionType); + LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E, - Value *(ScalarExprEmitter::*F)(const BinOpInfo &), - Value *&Result); + Value *(ScalarExprEmitter::*F)(const BinOpInfo &), + Value *&Result); Value *EmitCompoundAssign(const CompoundAssignOperator *E, Value *(ScalarExprEmitter::*F)(const BinOpInfo &)); + QualType getPromotionType(QualType Ty) { + if (Ty->isAnyComplexType()) { + QualType ElementType = Ty->castAs<ComplexType>()->getElementType(); + if (ElementType->isFloat16Type()) + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + } + if (Ty->isFloat16Type()) + if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) + return CGF.getContext().FloatTy; + + return QualType(); + } + // Binary operators and binary compound assignment operators. -#define HANDLEBINOP(OP) \ - Value *VisitBin ## OP(const BinaryOperator *E) { \ - return Emit ## OP(EmitBinOps(E)); \ - } \ - Value *VisitBin ## OP ## Assign(const CompoundAssignOperator *E) { \ - return EmitCompoundAssign(E, &ScalarExprEmitter::Emit ## OP); \ +#define HANDLEBINOP(OP) \ + Value *VisitBin##OP(const BinaryOperator *E) { \ + QualType promotionTy = getPromotionType(E->getType()); \ + auto result = Emit##OP(EmitBinOps(E, promotionTy)); \ + if (result) \ + if (!promotionTy.isNull()) \ + result = Builder.CreateFPTrunc(result, ConvertType(E->getType()), \ + "unpromotion"); \ + return result; \ + } \ + Value *VisitBin##OP##Assign(const CompoundAssignOperator *E) { \ + return EmitCompoundAssign(E, &ScalarExprEmitter::Emit##OP); \ } HANDLEBINOP(Mul) HANDLEBINOP(Div) @@ -3007,31 +3030,41 @@ Value *ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *E) { Expr *Op = E->getSubExpr(); + QualType PromotionType = getPromotionType(Op->getType()); if (Op->getType()->isAnyComplexType()) { // If it's an l-value, load through the appropriate subobject l-value. // Note that we have to ask E because Op might be an l-value that // this won't work for, e.g. an Obj-C property. - if (E->isGLValue()) - return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), - E->getExprLoc()).getScalarVal(); - + if (E->isGLValue()) { + if (!PromotionType.isNull()) { + return CGF.EmitPromotedComplexExpr(Op, PromotionType).first; + } + return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), E->getExprLoc()) + .getScalarVal(); + } // Otherwise, calculate and project. return CGF.EmitComplexExpr(Op, false, true).first; } + if (!PromotionType.isNull()) + return CGF.EmitPromotedScalarExpr(Op, PromotionType); return Visit(Op); } Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) { Expr *Op = E->getSubExpr(); + QualType PromotionType = getPromotionType(Op->getType()); if (Op->getType()->isAnyComplexType()) { // If it's an l-value, load through the appropriate subobject l-value. // Note that we have to ask E because Op might be an l-value that // this won't work for, e.g. an Obj-C property. - if (Op->isGLValue()) - return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), - E->getExprLoc()).getScalarVal(); - + if (Op->isGLValue()) { + if (!PromotionType.isNull()) { + return CGF.EmitPromotedComplexExpr(Op, PromotionType).second; + } + return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), E->getExprLoc()) + .getScalarVal(); + } // Otherwise, calculate and project. return CGF.EmitComplexExpr(Op, true, false).second; } @@ -3040,8 +3073,12 @@ // effects are evaluated, but not the actual value. if (Op->isGLValue()) CGF.EmitLValue(Op); + else if (!PromotionType.isNull()) + CGF.EmitPromotedScalarExpr(Op, PromotionType); else CGF.EmitScalarExpr(Op, true); + if (!PromotionType.isNull()) + return llvm::Constant::getNullValue(ConvertType(PromotionType)); return llvm::Constant::getNullValue(ConvertType(E->getType())); } @@ -3049,12 +3086,42 @@ // Binary Operators //===----------------------------------------------------------------------===// -BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) { +Value *ScalarExprEmitter::EmitPromoted(const Expr *E, QualType PromotionType) { + if (auto BO = dyn_cast<BinaryOperator>(E)) { + switch (BO->getOpcode()) { +#define HANDLE_BINOP(OP) \ + case BO_##OP: \ + return Emit##OP(EmitBinOps(BO, PromotionType)); + HANDLE_BINOP(Add) + HANDLE_BINOP(Sub) + HANDLE_BINOP(Mul) + HANDLE_BINOP(Div) +#undef HANDLE_BINOP + default: + break; + } + } else { + auto result = Visit(const_cast<Expr *>(E)); + if (result) + return CGF.Builder.CreateFPExt(result, ConvertType(PromotionType), "ext"); + } + // fallback path + auto result = Visit(const_cast<Expr *>(E)); + if (result) + result = CGF.Builder.CreateFPExt(result, ConvertType(E->getType())); + return result; +} + +BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E, + QualType PromotionType) { TestAndClearIgnoreResultAssign(); BinOpInfo Result; - Result.LHS = Visit(E->getLHS()); - Result.RHS = Visit(E->getRHS()); - Result.Ty = E->getType(); + Result.LHS = CGF.EmitPromotedScalarExpr(E->getLHS(), PromotionType); + Result.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType); + if (!PromotionType.isNull()) + Result.Ty = PromotionType; + else + Result.Ty = E->getType(); Result.Opcode = E->getOpcode(); Result.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); Result.E = E; @@ -3062,9 +3129,8 @@ } LValue ScalarExprEmitter::EmitCompoundAssignLValue( - const CompoundAssignOperator *E, - Value *(ScalarExprEmitter::*Func)(const BinOpInfo &), - Value *&Result) { + const CompoundAssignOperator *E, + Value *(ScalarExprEmitter::*Func)(const BinOpInfo &), Value *&Result) { QualType LHSTy = E->getLHS()->getType(); BinOpInfo OpInfo; @@ -3073,8 +3139,18 @@ // Emit the RHS first. __block variables need to have the rhs evaluated // first, plus this should improve codegen a little. - OpInfo.RHS = Visit(E->getRHS()); - OpInfo.Ty = E->getComputationResultType(); + + QualType PromotionTypeCR; + PromotionTypeCR = getPromotionType(E->getComputationResultType()); + if (PromotionTypeCR.isNull()) + PromotionTypeCR = E->getComputationResultType(); + QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType()); + QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType()); + if (!PromotionTypeRHS.isNull()) + OpInfo.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionTypeRHS); + else + OpInfo.RHS = Visit(E->getRHS()); + OpInfo.Ty = PromotionTypeCR; OpInfo.Opcode = E->getOpcode(); OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); OpInfo.E = E; @@ -3153,16 +3229,20 @@ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, OpInfo.FPFeatures); SourceLocation Loc = E->getExprLoc(); - OpInfo.LHS = - EmitScalarConversion(OpInfo.LHS, LHSTy, E->getComputationLHSType(), Loc); + if (!PromotionTypeLHS.isNull()) + OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, PromotionTypeLHS, + E->getExprLoc()); + else + OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, + E->getComputationLHSType(), Loc); // Expand the binary operator. Result = (this->*Func)(OpInfo); // Convert the result back to the LHS type, // potentially with Implicit Conversion sanitizer check. - Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, - Loc, ScalarConversionOpts(CGF.SanOpts)); + Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, + ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); @@ -4895,6 +4975,15 @@ .EmitComplexToScalarConversion(Src, SrcTy, DstTy, Loc); } +Value * +CodeGenFunction::EmitPromotedScalarExpr(const Expr *E, + QualType PromotionType) { + if (!PromotionType.isNull()) + return ScalarExprEmitter(*this).EmitPromoted(E, PromotionType); + else + return ScalarExprEmitter(*this).Visit(const_cast<Expr *>(E)); +} + llvm::Value *CodeGenFunction:: EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, Index: clang/lib/CodeGen/CGExprComplex.cpp =================================================================== --- clang/lib/CodeGen/CGExprComplex.cpp +++ clang/lib/CodeGen/CGExprComplex.cpp @@ -253,7 +253,10 @@ QualType Ty; // Computation Type. }; - BinOpInfo EmitBinOps(const BinaryOperator *E); + BinOpInfo EmitBinOps(const BinaryOperator *E, + QualType PromotionTy = QualType()); + ComplexPairTy EmitPromoted(const Expr *E, QualType PromotionTy); + ComplexPairTy EmitPromotedComplexOperand(const Expr *E, QualType PromotionTy); LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E, ComplexPairTy (ComplexExprEmitter::*Func) (const BinOpInfo &), @@ -270,18 +273,45 @@ ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); - ComplexPairTy VisitBinAdd(const BinaryOperator *E) { - return EmitBinAdd(EmitBinOps(E)); - } - ComplexPairTy VisitBinSub(const BinaryOperator *E) { - return EmitBinSub(EmitBinOps(E)); - } - ComplexPairTy VisitBinMul(const BinaryOperator *E) { - return EmitBinMul(EmitBinOps(E)); - } - ComplexPairTy VisitBinDiv(const BinaryOperator *E) { - return EmitBinDiv(EmitBinOps(E)); - } + QualType getPromotionType(QualType Ty) { + QualType PromotedTy; + if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) { + if (Ty->isRealFloatingType()) { + const auto *BT = dyn_cast<BuiltinType>(Ty); + if (BT->getKind() == BuiltinType::Float16) + PromotedTy = CGF.getContext().FloatTy; + } else { + assert(Ty->isAnyComplexType() && "Expecting to promote a complex type!"); + QualType ElementType = Ty->castAs<ComplexType>()->getElementType(); + if (ElementType->isFloat16Type()) + PromotedTy = CGF.getContext().getComplexType(CGF.getContext().FloatTy); + } + } + return PromotedTy; + } + +#define HANDLEBINOP(OP) \ + ComplexPairTy VisitBin##OP(const BinaryOperator *E) { \ + QualType promotionTy = getPromotionType(E->getType()); \ + ComplexPairTy result = EmitBin##OP(EmitBinOps(E, promotionTy)); \ + if (!promotionTy.isNull()) { \ + llvm::Type *ComplexElementTy = CGF.ConvertType( \ + E->getType()->castAs<ComplexType>()->getElementType()); \ + if (result.first) \ + result.first = Builder.CreateFPTrunc(result.first, ComplexElementTy, \ + "unpromotion"); \ + if (result.second) \ + result.second = Builder.CreateFPTrunc(result.second, ComplexElementTy, \ + "unpromotion"); \ + } \ + return result; \ + } + + HANDLEBINOP(Mul) + HANDLEBINOP(Div) + HANDLEBINOP(Add) + HANDLEBINOP(Sub) +#undef HANDLEBINOP ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) { return Visit(E->getSemanticForm()); @@ -876,25 +906,81 @@ return ComplexPairTy(DSTr, DSTi); } +ComplexPairTy ComplexExprEmitter::EmitPromoted(const Expr *E, + QualType PromotionType) { + llvm::Value *Resultr = nullptr; + llvm::Value *Resulti = nullptr; + if (auto BO = dyn_cast<BinaryOperator>(E)) { + switch (BO->getOpcode()) { +#define HANDLE_BINOP(OP) \ + case BO_##OP: \ + return EmitBin##OP(EmitBinOps(BO, PromotionType)); + HANDLE_BINOP(Add) + HANDLE_BINOP(Sub) + HANDLE_BINOP(Mul) + HANDLE_BINOP(Div) +#undef HANDLE_BINOP + default: + break; + } + } else { + auto result = Visit(const_cast<Expr *>(E)); + llvm::Type *ComplexElementTy; + if (!PromotionType.isNull()) + ComplexElementTy = CGF.ConvertType( + PromotionType->castAs<ComplexType>()->getElementType()); + else + ComplexElementTy = CGF.ConvertType( + E->getType()->castAs<ComplexType>()->getElementType()); + if (result.first) + Resultr = CGF.Builder.CreateFPExt(result.first, ComplexElementTy, "ext"); + if (result.second) + Resulti = CGF.Builder.CreateFPExt(result.second, ComplexElementTy, "ext"); + } + return ComplexPairTy(Resultr, Resulti); +} + +ComplexPairTy CodeGenFunction::EmitPromotedComplexExpr(const Expr *E, + QualType DstTy) { + return ComplexExprEmitter(*this).EmitPromoted(E, DstTy); +} + +ComplexPairTy +ComplexExprEmitter::EmitPromotedComplexOperand(const Expr *E, + QualType PromotionType) { + if (E->getType()->isAnyComplexType()) { + if (!PromotionType.isNull()) + return ComplexPairTy(CGF.EmitPromotedComplexExpr(E, PromotionType)); + else + return Visit(const_cast<Expr *>(E)); + } else { + if (!PromotionType.isNull()) { + QualType ComplexElementTy = + PromotionType->castAs<ComplexType>()->getElementType(); + return ComplexPairTy(CGF.EmitPromotedScalarExpr(E, ComplexElementTy), + nullptr); + } else { + return ComplexPairTy(CGF.EmitScalarExpr(E), nullptr); + } + } +} + ComplexExprEmitter::BinOpInfo -ComplexExprEmitter::EmitBinOps(const BinaryOperator *E) { +ComplexExprEmitter::EmitBinOps(const BinaryOperator *E, + QualType PromotionType) { TestAndClearIgnoreReal(); TestAndClearIgnoreImag(); BinOpInfo Ops; - if (E->getLHS()->getType()->isRealFloatingType()) - Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr); - else - Ops.LHS = Visit(E->getLHS()); - if (E->getRHS()->getType()->isRealFloatingType()) - Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr); - else - Ops.RHS = Visit(E->getRHS()); - Ops.Ty = E->getType(); + Ops.LHS = EmitPromotedComplexOperand(E->getLHS(), PromotionType); + Ops.RHS = EmitPromotedComplexOperand(E->getRHS(), PromotionType); + if (!PromotionType.isNull()) + Ops.Ty = PromotionType; + else + Ops.Ty = E->getType(); return Ops; } - LValue ComplexExprEmitter:: EmitCompoundAssignLValue(const CompoundAssignOperator *E, ComplexPairTy (ComplexExprEmitter::*Func)(const BinOpInfo&), @@ -911,19 +997,35 @@ // Load the RHS and LHS operands. // __block variables need to have the rhs evaluated first, plus this should // improve codegen a little. - OpInfo.Ty = E->getComputationResultType(); + QualType PromotionTypeCR; + PromotionTypeCR = getPromotionType(E->getComputationResultType()); + if (PromotionTypeCR.isNull()) + PromotionTypeCR = E->getComputationResultType(); + OpInfo.Ty = PromotionTypeCR; QualType ComplexElementTy = cast<ComplexType>(OpInfo.Ty)->getElementType(); + QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType()); // The RHS should have been converted to the computation type. if (E->getRHS()->getType()->isRealFloatingType()) { - assert( - CGF.getContext() - .hasSameUnqualifiedType(ComplexElementTy, E->getRHS()->getType())); - OpInfo.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr); + if (!PromotionTypeRHS.isNull()) + OpInfo.RHS = ComplexPairTy( + CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionTypeRHS), + nullptr); + else { + assert(CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, + E->getRHS()->getType())); + + OpInfo.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr); + } } else { - assert(CGF.getContext() - .hasSameUnqualifiedType(OpInfo.Ty, E->getRHS()->getType())); - OpInfo.RHS = Visit(E->getRHS()); + if (!PromotionTypeRHS.isNull()) { + OpInfo.RHS = ComplexPairTy( + CGF.EmitPromotedComplexExpr(E->getRHS(), PromotionTypeRHS)); + } else { + assert(CGF.getContext().hasSameUnqualifiedType(OpInfo.Ty, + E->getRHS()->getType())); + OpInfo.RHS = Visit(E->getRHS()); + } } LValue LHS = CGF.EmitLValue(E->getLHS()); @@ -938,9 +1040,14 @@ // For floating point real operands we can directly pass the scalar form // to the binary operator emission and potentially get more efficient code. if (LHSTy->isRealFloatingType()) { + QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType()); if (!CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, LHSTy)) LHSVal = CGF.EmitScalarConversion(LHSVal, LHSTy, ComplexElementTy, Loc); - OpInfo.LHS = ComplexPairTy(LHSVal, nullptr); + if (!PromotionTypeLHS.isNull()) { + OpInfo.LHS = ComplexPairTy(LHSVal, nullptr); + } else { + OpInfo.LHS = ComplexPairTy(LHSVal, nullptr); + } } else { OpInfo.LHS = EmitScalarToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc); } Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -288,6 +288,10 @@ return false; } + bool shouldEmitFloat16WithExcessPrecision() const { + return HasFloat16 && !hasLegalHalfType(); + } + void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -239,6 +239,7 @@ HasAVX512ER = true; } else if (Feature == "+avx512fp16") { HasAVX512FP16 = true; + HasLegalHalfType = true; } else if (Feature == "+avx512pf") { HasAVX512PF = true; } else if (Feature == "+avx512dq") { Index: clang/include/clang/Basic/TargetInfo.h =================================================================== --- clang/include/clang/Basic/TargetInfo.h +++ clang/include/clang/Basic/TargetInfo.h @@ -912,6 +912,8 @@ return true; } + virtual bool shouldEmitFloat16WithExcessPrecision() const { return false; } + /// Specify if mangling based on address space map should be used or /// not for language specific address spaces bool useAddressSpaceMapMangling() const { Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -749,7 +749,11 @@ includes all 64-bit and all recent 32-bit processors. When the target supports AVX512-FP16, ``_Float16`` arithmetic is performed using that native support. Otherwise, ``_Float16`` arithmetic is performed by promoting to ``float``, -performing the operation, and then truncating to ``_Float16``. +performing the operation, and then truncating to ``_Float16``. When doing this +emulation, Clang defaults to following the C standard's rules for excess +precision arithmetic, which avoids intermediate truncations within statements +and may generate different results from a strict operation-by-operation +emulation. ``_Float16`` will be supported on more targets as they define ABIs for it.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits