zahiraam updated this revision to Diff 451261.
zahiraam marked 7 inline comments as done.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113107/new/

https://reviews.llvm.org/D113107

Files:
  clang/docs/LanguageExtensions.rst
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/CodeGen/CGExprComplex.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/CodeGen/X86/Float16-arithmetic.c
  clang/test/CodeGen/X86/Float16-complex.c
  clang/test/Sema/Float16.c
  clang/test/SemaCXX/Float16.cpp

Index: clang/test/SemaCXX/Float16.cpp
===================================================================
--- clang/test/SemaCXX/Float16.cpp
+++ clang/test/SemaCXX/Float16.cpp
@@ -1,20 +1,10 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
 
-#ifdef HAVE
 // expected-no-diagnostics
-#endif // HAVE
 
-#ifndef HAVE
-// expected-error@+2{{_Float16 is not supported on this target}}
-#endif // !HAVE
 _Float16 f;
 
-#ifndef HAVE
-// expected-error@+2{{invalid suffix 'F16' on floating constant}}
-#endif // !HAVE
 const auto g = 1.1F16;
Index: clang/test/Sema/Float16.c
===================================================================
--- clang/test/Sema/Float16.c
+++ clang/test/Sema/Float16.c
@@ -1,19 +1,15 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s
 
-#ifndef HAVE
-// expected-error@+2{{_Float16 is not supported on this target}}
-#endif // HAVE
-_Float16 f;
-
-#ifdef HAVE
 _Complex _Float16 a;
 void builtin_complex(void) {
   _Float16 a = 0;
   (void)__builtin_complex(a, a); // expected-error {{'_Complex _Float16' is invalid}}
 }
-#endif
Index: clang/test/CodeGen/X86/Float16-complex.c
===================================================================
--- clang/test/CodeGen/X86/Float16-complex.c
+++ clang/test/CodeGen/X86/Float16-complex.c
@@ -1,134 +1,995 @@
-// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86
-// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefixes=AVX
+// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefixes=X86
 
+// AVX-LABEL: @add_half_rr(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[ADD]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @add_half_rr(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT:    [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP2]]
+//
 _Float16 _Complex add_half_rr(_Float16 a, _Float16 b) {
-  // X86-LABEL: @add_half_rr(
-  // X86: fadd
-  // X86-NOT: fadd
-  // X86: ret
+
+
   return a + b;
 }
+
+// AVX-LABEL: @add_half_cr(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[ADD_R:%.*]] = fadd half [[A_REAL]], [[TMP0]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[ADD_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[A_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @add_half_cr(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT2]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT:    [[UNPROMOTION3:%.*]] = fptrunc float [[EXT1]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP1]]
+//
 _Float16 _Complex add_half_cr(_Float16 _Complex a, _Float16 b) {
-  // X86-LABEL: @add_half_cr(
-  // X86: fadd
-  // X86-NOT: fadd
-  // X86: ret
+
+
   return a + b;
 }
+
+// AVX-LABEL: @add_half_rc(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT:    [[ADD_R:%.*]] = fadd half [[TMP0]], [[B_REAL]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[ADD_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[B_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @add_half_rc(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT:    [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT1]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT:    [[UNPROMOTION3:%.*]] = fptrunc float [[EXT2]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP1]]
+//
 _Float16 _Complex add_half_rc(_Float16 a, _Float16 _Complex b) {
-  // X86-LABEL: @add_half_rc(
-  // X86: fadd
-  // X86-NOT: fadd
-  // X86: ret
+
+
   return a + b;
 }
+
+// AVX-LABEL: @add_half_cc(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT:    [[ADD_R:%.*]] = fadd half [[A_REAL]], [[B_REAL]]
+// AVX-NEXT:    [[ADD_I:%.*]] = fadd half [[A_IMAG]], [[B_IMAG]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[ADD_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[ADD_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @add_half_cc(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT:    [[EXT3:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT:    [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT2]]
+// X86-NEXT:    [[ADD_I:%.*]] = fadd float [[EXT1]], [[EXT3]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT:    [[UNPROMOTION4:%.*]] = fptrunc float [[ADD_I]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP0]]
+//
 _Float16 _Complex add_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
-  // X86-LABEL: @add_half_cc(
-  // X86: fadd
-  // X86: fadd
-  // X86-NOT: fadd
-  // X86: ret
+
+
   return a + b;
 }
 
+// AVX-LABEL: @sub_half_rr(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[SUB:%.*]] = fsub half [[TMP0]], [[TMP1]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[SUB]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @sub_half_rr(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT:    [[SUB:%.*]] = fsub float [[EXT]], [[EXT1]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[SUB]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP2]]
+//
 _Float16 _Complex sub_half_rr(_Float16 a, _Float16 b) {
-  // X86-LABEL: @sub_half_rr(
-  // X86: fsub
-  // X86-NOT: fsub
-  // X86: ret
+
+
   return a - b;
 }
+
+// AVX-LABEL: @sub_half_cr(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[SUB_R:%.*]] = fsub half [[A_REAL]], [[TMP0]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[SUB_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[A_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @sub_half_cr(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT2]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half
+// X86-NEXT:    [[UNPROMOTION3:%.*]] = fptrunc float [[EXT1]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP1]]
+//
 _Float16 _Complex sub_half_cr(_Float16 _Complex a, _Float16 b) {
-  // X86-LABEL: @sub_half_cr(
-  // X86: fsub
-  // X86-NOT: fsub
-  // X86: ret
+
+
   return a - b;
 }
+
+// AVX-LABEL: @sub_half_rc(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT:    [[SUB_R:%.*]] = fsub half [[TMP0]], [[B_REAL]]
+// AVX-NEXT:    [[SUB_I:%.*]] = fneg half [[B_IMAG]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[SUB_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[SUB_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @sub_half_rc(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT:    [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT1]]
+// X86-NEXT:    [[SUB_I:%.*]] = fneg float [[EXT2]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half
+// X86-NEXT:    [[UNPROMOTION3:%.*]] = fptrunc float [[SUB_I]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP1]]
+//
 _Float16 _Complex sub_half_rc(_Float16 a, _Float16 _Complex b) {
-  // X86-LABEL: @sub_half_rc(
-  // X86: fsub
-  // X86: fneg
-  // X86-NOT: fsub
-  // X86: ret
+
+
   return a - b;
 }
+
+// AVX-LABEL: @sub_half_cc(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT:    [[SUB_R:%.*]] = fsub half [[A_REAL]], [[B_REAL]]
+// AVX-NEXT:    [[SUB_I:%.*]] = fsub half [[A_IMAG]], [[B_IMAG]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[SUB_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[SUB_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @sub_half_cc(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT:    [[EXT3:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT:    [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT2]]
+// X86-NEXT:    [[SUB_I:%.*]] = fsub float [[EXT1]], [[EXT3]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half
+// X86-NEXT:    [[UNPROMOTION4:%.*]] = fptrunc float [[SUB_I]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP0]]
+//
 _Float16 _Complex sub_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
-  // X86-LABEL: @sub_half_cc(
-  // X86: fsub
-  // X86: fsub
-  // X86-NOT: fsub
-  // X86: ret
+
+
   return a - b;
 }
 
+// AVX-LABEL: @mul_half_rr(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[MUL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @mul_half_rr(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT:    [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[MUL]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP2]]
+//
 _Float16 _Complex mul_half_rr(_Float16 a, _Float16 b) {
-  // X86-LABEL: @mul_half_rr(
-  // X86: fmul
-  // X86-NOT: fmul
-  // X86: ret
+
+
   return a * b;
 }
+
+// AVX-LABEL: @mul_half_cr(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[MUL_RL:%.*]] = fmul half [[A_REAL]], [[TMP0]]
+// AVX-NEXT:    [[MUL_IL:%.*]] = fmul half [[A_IMAG]], [[TMP0]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[MUL_RL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[MUL_IL]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @mul_half_cr(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[MUL_RL:%.*]] = fmul float [[EXT]], [[EXT2]]
+// X86-NEXT:    [[MUL_IL:%.*]] = fmul float [[EXT1]], [[EXT2]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[MUL_RL]] to half
+// X86-NEXT:    [[UNPROMOTION3:%.*]] = fptrunc float [[MUL_IL]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP1]]
+//
 _Float16 _Complex mul_half_cr(_Float16 _Complex a, _Float16 b) {
-  // X86-LABEL: @mul_half_cr(
-  // X86: fmul
-  // X86: fmul
-  // X86-NOT: fmul
-  // X86: ret
+
+
   return a * b;
 }
+
+// AVX-LABEL: @mul_half_rc(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT:    [[MUL_RL:%.*]] = fmul half [[TMP0]], [[B_REAL]]
+// AVX-NEXT:    [[MUL_IR:%.*]] = fmul half [[TMP0]], [[B_IMAG]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[MUL_RL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[MUL_IR]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @mul_half_rc(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT:    [[MUL_RL:%.*]] = fmul float [[EXT]], [[EXT1]]
+// X86-NEXT:    [[MUL_IR:%.*]] = fmul float [[EXT]], [[EXT2]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[MUL_RL]] to half
+// X86-NEXT:    [[UNPROMOTION3:%.*]] = fptrunc float [[MUL_IR]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP1]]
+//
 _Float16 _Complex mul_half_rc(_Float16 a, _Float16 _Complex b) {
-  // X86-LABEL: @mul_half_rc(
-  // X86: fmul
-  // X86: fmul
-  // X86-NOT: fmul
-  // X86: ret
+
+
   return a * b;
 }
+
+// AVX-LABEL: @mul_half_cc(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[COERCE:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT:    [[MUL_AC:%.*]] = fmul half [[A_REAL]], [[B_REAL]]
+// AVX-NEXT:    [[MUL_BD:%.*]] = fmul half [[A_IMAG]], [[B_IMAG]]
+// AVX-NEXT:    [[MUL_AD:%.*]] = fmul half [[A_REAL]], [[B_IMAG]]
+// AVX-NEXT:    [[MUL_BC:%.*]] = fmul half [[A_IMAG]], [[B_REAL]]
+// AVX-NEXT:    [[MUL_R:%.*]] = fsub half [[MUL_AC]], [[MUL_BD]]
+// AVX-NEXT:    [[MUL_I:%.*]] = fadd half [[MUL_AD]], [[MUL_BC]]
+// AVX-NEXT:    [[ISNAN_CMP:%.*]] = fcmp uno half [[MUL_R]], [[MUL_R]]
+// AVX-NEXT:    br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]]
+// AVX:       complex_mul_imag_nan:
+// AVX-NEXT:    [[ISNAN_CMP1:%.*]] = fcmp uno half [[MUL_I]], [[MUL_I]]
+// AVX-NEXT:    br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
+// AVX:       complex_mul_libcall:
+// AVX-NEXT:    [[CALL:%.*]] = call <2 x half> @__mulhc3(half noundef [[A_REAL]], half noundef [[A_IMAG]], half noundef [[B_REAL]], half noundef [[B_IMAG]]) #[[ATTR1:[0-9]+]]
+// AVX-NEXT:    store <2 x half> [[CALL]], ptr [[COERCE]], align 2
+// AVX-NEXT:    [[COERCE_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 0
+// AVX-NEXT:    [[COERCE_REAL:%.*]] = load half, ptr [[COERCE_REALP]], align 2
+// AVX-NEXT:    [[COERCE_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 1
+// AVX-NEXT:    [[COERCE_IMAG:%.*]] = load half, ptr [[COERCE_IMAGP]], align 2
+// AVX-NEXT:    br label [[COMPLEX_MUL_CONT]]
+// AVX:       complex_mul_cont:
+// AVX-NEXT:    [[REAL_MUL_PHI:%.*]] = phi half [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ]
+// AVX-NEXT:    [[IMAG_MUL_PHI:%.*]] = phi half [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @mul_half_cc(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[COERCE:%.*]] = alloca { float, float }, align 4
+// X86-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT:    [[EXT3:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT:    [[MUL_AC:%.*]] = fmul float [[EXT]], [[EXT2]]
+// X86-NEXT:    [[MUL_BD:%.*]] = fmul float [[EXT1]], [[EXT3]]
+// X86-NEXT:    [[MUL_AD:%.*]] = fmul float [[EXT]], [[EXT3]]
+// X86-NEXT:    [[MUL_BC:%.*]] = fmul float [[EXT1]], [[EXT2]]
+// X86-NEXT:    [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]]
+// X86-NEXT:    [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]]
+// X86-NEXT:    [[ISNAN_CMP:%.*]] = fcmp uno float [[MUL_R]], [[MUL_R]]
+// X86-NEXT:    br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]]
+// X86:       complex_mul_imag_nan:
+// X86-NEXT:    [[ISNAN_CMP4:%.*]] = fcmp uno float [[MUL_I]], [[MUL_I]]
+// X86-NEXT:    br i1 [[ISNAN_CMP4]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
+// X86:       complex_mul_libcall:
+// X86-NEXT:    [[CALL:%.*]] = call <2 x float> @__mulsc3(float noundef [[EXT]], float noundef [[EXT1]], float noundef [[EXT2]], float noundef [[EXT3]]) #[[ATTR2:[0-9]+]]
+// X86-NEXT:    store <2 x float> [[CALL]], ptr [[COERCE]], align 4
+// X86-NEXT:    [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0
+// X86-NEXT:    [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4
+// X86-NEXT:    [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1
+// X86-NEXT:    [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4
+// X86-NEXT:    br label [[COMPLEX_MUL_CONT]]
+// X86:       complex_mul_cont:
+// X86-NEXT:    [[REAL_MUL_PHI:%.*]] = phi float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ]
+// X86-NEXT:    [[IMAG_MUL_PHI:%.*]] = phi float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half
+// X86-NEXT:    [[UNPROMOTION5:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION5]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP0]]
+//
 _Float16 _Complex mul_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
-  // X86-LABEL: @mul_half_cc(
-  // X86: %[[AC:[^ ]+]] = fmul
-  // X86: %[[BD:[^ ]+]] = fmul
-  // X86: %[[AD:[^ ]+]] = fmul
-  // X86: %[[BC:[^ ]+]] = fmul
-  // X86: %[[RR:[^ ]+]] = fsub half %[[AC]], %[[BD]]
-  // X86: %[[RI:[^ ]+]] = fadd half
-  // X86-DAG: %[[AD]]
-  // X86-DAG: ,
-  // X86-DAG: %[[BC]]
-  // X86: fcmp uno half %[[RR]]
-  // X86: fcmp uno half %[[RI]]
-  // X86: call {{.*}} @__mulhc3(
-  // X86: ret
+
+
   return a * b;
 }
-
+// AVX-LABEL: @div_half_rr(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[DIV:%.*]] = fdiv half [[TMP0]], [[TMP1]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[DIV]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @div_half_rr(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT:    [[DIV:%.*]] = fdiv float [[EXT]], [[EXT1]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[DIV]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP2]]
+//
 _Float16 _Complex div_half_rr(_Float16 a, _Float16 b) {
-  // X86-LABEL: @div_half_rr(
-  // X86: fdiv
-  // X86-NOT: fdiv
-  // X86: ret
+
+
   return a / b;
 }
+
+// AVX-LABEL: @div_half_cr(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = fdiv half [[A_REAL]], [[TMP0]]
+// AVX-NEXT:    [[TMP2:%.*]] = fdiv half [[A_IMAG]], [[TMP0]]
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[TMP1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[TMP2]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP3]]
+//
+// X86-LABEL: @div_half_cr(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[TMP1:%.*]] = fdiv float [[EXT]], [[EXT2]]
+// X86-NEXT:    [[TMP2:%.*]] = fdiv float [[EXT1]], [[EXT2]]
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[TMP1]] to half
+// X86-NEXT:    [[UNPROMOTION3:%.*]] = fptrunc float [[TMP2]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP3]]
+//
 _Float16 _Complex div_half_cr(_Float16 _Complex a, _Float16 b) {
-  // X86-LABEL: @div_half_cr(
-  // X86: fdiv
-  // X86: fdiv
-  // X86-NOT: fdiv
-  // X86: ret
+
+
   return a / b;
 }
+// AVX-LABEL: @div_half_rc(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    [[COERCE:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT:    [[CALL:%.*]] = call <2 x half> @__divhc3(half noundef [[TMP0]], half noundef 0xH0000, half noundef [[B_REAL]], half noundef [[B_IMAG]]) #[[ATTR1]]
+// AVX-NEXT:    store <2 x half> [[CALL]], ptr [[COERCE]], align 2
+// AVX-NEXT:    [[COERCE_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 0
+// AVX-NEXT:    [[COERCE_REAL:%.*]] = load half, ptr [[COERCE_REALP]], align 2
+// AVX-NEXT:    [[COERCE_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 1
+// AVX-NEXT:    [[COERCE_IMAG:%.*]] = load half, ptr [[COERCE_IMAGP]], align 2
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[COERCE_REAL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[COERCE_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @div_half_rc(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    [[COERCE:%.*]] = alloca { float, float }, align 4
+// X86-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT:    [[CALL:%.*]] = call <2 x float> @__divsc3(float noundef [[EXT]], float noundef 0.000000e+00, float noundef [[EXT1]], float noundef [[EXT2]]) #[[ATTR2]]
+// X86-NEXT:    store <2 x float> [[CALL]], ptr [[COERCE]], align 4
+// X86-NEXT:    [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0
+// X86-NEXT:    [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4
+// X86-NEXT:    [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1
+// X86-NEXT:    [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[COERCE_REAL]] to half
+// X86-NEXT:    [[UNPROMOTION3:%.*]] = fptrunc float [[COERCE_IMAG]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP1]]
+//
 _Float16 _Complex div_half_rc(_Float16 a, _Float16 _Complex b) {
-  // X86-LABEL: @div_half_rc(
-  // X86-NOT: fdiv
-  // X86: call {{.*}} @__divhc3(
-  // X86: ret
+
   return a / b;
 }
+
+// AVX-LABEL: @div_half_cc(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[COERCE:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT:    [[CALL:%.*]] = call <2 x half> @__divhc3(half noundef [[A_REAL]], half noundef [[A_IMAG]], half noundef [[B_REAL]], half noundef [[B_IMAG]]) #[[ATTR1]]
+// AVX-NEXT:    store <2 x half> [[CALL]], ptr [[COERCE]], align 2
+// AVX-NEXT:    [[COERCE_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 0
+// AVX-NEXT:    [[COERCE_REAL:%.*]] = load half, ptr [[COERCE_REALP]], align 2
+// AVX-NEXT:    [[COERCE_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 1
+// AVX-NEXT:    [[COERCE_IMAG:%.*]] = load half, ptr [[COERCE_IMAGP]], align 2
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[COERCE_REAL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half [[COERCE_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @div_half_cc(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[COERCE:%.*]] = alloca { float, float }, align 4
+// X86-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT:    store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT:    [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT:    [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT:    [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT:    [[EXT2:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT:    [[EXT3:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT:    [[CALL:%.*]] = call <2 x float> @__divsc3(float noundef [[EXT]], float noundef [[EXT1]], float noundef [[EXT2]], float noundef [[EXT3]]) #[[ATTR2]]
+// X86-NEXT:    store <2 x float> [[CALL]], ptr [[COERCE]], align 4
+// X86-NEXT:    [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0
+// X86-NEXT:    [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4
+// X86-NEXT:    [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1
+// X86-NEXT:    [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4
+// X86-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[COERCE_REAL]] to half
+// X86-NEXT:    [[UNPROMOTION4:%.*]] = fptrunc float [[COERCE_IMAG]] to half
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP0]]
+//
 _Float16 _Complex div_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
-  // X86-LABEL: @div_half_cc(
-  // X86-NOT: fdiv
-  // X86: call {{.*}} @__divhc3(
-  // X86: ret
+
+
+
   return a / b;
 }
+
+// AVX-LABEL: @addcompound_half_rr(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    [[C_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT:    [[ADD:%.*]] = fadd half [[TMP1]], [[TMP0]]
+// AVX-NEXT:    store half [[ADD]], ptr [[C_ADDR]], align 2
+// AVX-NEXT:    [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[TMP2]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP3]]
+//
+// X86-LABEL: @addcompound_half_rr(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    [[C_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT:    [[CONV:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT:    [[ADD:%.*]] = fadd float [[CONV]], [[EXT]]
+// X86-NEXT:    [[CONV1:%.*]] = fptrunc float [[ADD]] to half
+// X86-NEXT:    store half [[CONV1]], ptr [[C_ADDR]], align 2
+// X86-NEXT:    [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[TMP2]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP3]]
+//
+_Float16 _Complex addcompound_half_rr(_Float16 a, _Float16 c) {
+
+  c += a;
+  return c;
+}
+
+// AVX-LABEL: @addcompound_half_cr(
+// AVX-NEXT:  entry:
+// AVX-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT:    [[C_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// AVX-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT:    [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT:    [[ADD_R:%.*]] = fadd half [[TMP0]], [[A_REAL]]
+// AVX-NEXT:    store half [[ADD_R]], ptr [[C_ADDR]], align 2
+// AVX-NEXT:    [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT:    store half [[TMP1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT:    ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @addcompound_half_cr(
+// X86-NEXT:  entry:
+// X86-NEXT:    [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT:    [[C_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// X86-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT:    [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT:    [[CONV:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT:    [[ADD_R:%.*]] = fadd float [[CONV]], [[EXT]]
+// X86-NEXT:    [[CONV2:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT:    store half [[CONV2]], ptr [[C_ADDR]], align 2
+// X86-NEXT:    [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT:    [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT:    store half [[TMP1]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT:    store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT:    ret <2 x half> [[TMP2]]
+//
+_Float16 _Complex addcompound_half_cr(_Float16 _Complex a, _Float16 c) {
+
+  c += a;
+  return c;
+}
+
Index: clang/test/CodeGen/X86/Float16-arithmetic.c
===================================================================
--- clang/test/CodeGen/X86/Float16-arithmetic.c
+++ clang/test/CodeGen/X86/Float16-arithmetic.c
@@ -1,7 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
 
-
 // CHECK-LABEL: @add1(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
@@ -9,9 +8,12 @@
 // CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
 // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT:    [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    ret half [[ADD]]
+// CHECK-NEXT:    [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]]
+// CHECK-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half
+// CHECK-NEXT:    ret half [[UNPROMOTION]]
 //
 _Float16 add1(_Float16 a, _Float16 b) {
   return a + b;
@@ -26,11 +28,15 @@
 // CHECK-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
 // CHECK-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
 // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT:    [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
-// CHECK-NEXT:    [[ADD1:%.*]] = fadd half [[ADD]], [[TMP2]]
-// CHECK-NEXT:    ret half [[ADD1]]
+// CHECK-NEXT:    [[EXT2:%.*]] = fpext half [[TMP2]] to float
+// CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[ADD]], [[EXT2]]
+// CHECK-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[ADD3]] to half
+// CHECK-NEXT:    ret half [[UNPROMOTION]]
 //
 _Float16 add2(_Float16 a, _Float16 b, _Float16 c) {
   return a + b + c;
@@ -43,9 +49,12 @@
 // CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
 // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT:    [[DIV:%.*]] = fdiv half [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    ret half [[DIV]]
+// CHECK-NEXT:    [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[EXT]], [[EXT1]]
+// CHECK-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[DIV]] to half
+// CHECK-NEXT:    ret half [[UNPROMOTION]]
 //
 _Float16 div(_Float16 a, _Float16 b) {
   return a / b;
@@ -58,9 +67,12 @@
 // CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    store half [[B:%.*]], ptr [[B_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
 // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT:    [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    ret half [[MUL]]
+// CHECK-NEXT:    [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]]
+// CHECK-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[MUL]] to half
+// CHECK-NEXT:    ret half [[UNPROMOTION]]
 //
 _Float16 mul(_Float16 a, _Float16 b) {
   return a * b;
@@ -77,13 +89,18 @@
 // CHECK-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
 // CHECK-NEXT:    store half [[D:%.*]], ptr [[D_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
 // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT:    [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    [[EXT2:%.*]] = fpext half [[TMP2]] to float
 // CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2
-// CHECK-NEXT:    [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]]
-// CHECK-NEXT:    [[ADD:%.*]] = fadd half [[MUL]], [[MUL1]]
-// CHECK-NEXT:    ret half [[ADD]]
+// CHECK-NEXT:    [[EXT3:%.*]] = fpext half [[TMP3]] to float
+// CHECK-NEXT:    [[MUL4:%.*]] = fmul float [[EXT2]], [[EXT3]]
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[MUL]], [[MUL4]]
+// CHECK-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half
+// CHECK-NEXT:    ret half [[UNPROMOTION]]
 //
 _Float16 add_and_mul1(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
   return a * b + c * d;
@@ -100,13 +117,154 @@
 // CHECK-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
 // CHECK-NEXT:    store half [[D:%.*]], ptr [[D_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
 // CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT:    [[MUL:%.*]] = fmul half 0xH4600, [[TMP1]]
-// CHECK-NEXT:    [[SUB:%.*]] = fsub half [[TMP0]], [[MUL]]
+// CHECK-NEXT:    [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT:    [[MUL:%.*]] = fmul float 6.000000e+00, [[EXT1]]
+// CHECK-NEXT:    [[SUB:%.*]] = fsub float [[EXT]], [[MUL]]
+// CHECK-NEXT:    [[UNPROMOTION:%.*]] = fptrunc float [[SUB]] to half
+// CHECK-NEXT:    [[EXT2:%.*]] = fpext half [[UNPROMOTION]] to float
 // CHECK-NEXT:    [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
-// CHECK-NEXT:    [[ADD:%.*]] = fadd half [[SUB]], [[TMP2]]
-// CHECK-NEXT:    ret half [[ADD]]
+// CHECK-NEXT:    [[EXT3:%.*]] = fpext half [[TMP2]] to float
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[EXT2]], [[EXT3]]
+// CHECK-NEXT:    [[UNPROMOTION4:%.*]] = fptrunc float [[ADD]] to half
+// CHECK-NEXT:    ret half [[UNPROMOTION4]]
 //
 _Float16 add_and_mul2(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
   return (a - 6 * b) + c;
 }
+
+// CHECK-LABEL: @addcompound(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[CONV]], [[EXT]]
+// CHECK-NEXT:    [[CONV1:%.*]] = fptrunc float [[ADD]] to half
+// CHECK-NEXT:    store half [[CONV1]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    ret half [[TMP2]]
+//
+_Float16 addcompound(_Float16 a, _Float16 c) {
+  c += a;
+  return c;
+}
+
+// CHECK-LABEL: @mulcompound_int_float16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[CONV]], [[EXT]]
+// CHECK-NEXT:    [[CONV1:%.*]] = fptosi float [[MUL]] to i32
+// CHECK-NEXT:    store i32 [[CONV1]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT:    ret half [[TMP2]]
+//
+_Float16 mulcompound_int_float16(int a, _Float16 c) {
+  a *= c;
+  return c;
+}
+
+// CHECK-LABEL: @mulcompound_float_float16c(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[C:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// CHECK-NEXT:    store float [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT:    [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// CHECK-NEXT:    [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// CHECK-NEXT:    [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = fpext half [[C_REAL]] to float
+// CHECK-NEXT:    [[CONV1:%.*]] = fpext half [[C_IMAG]] to float
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[MUL_RL:%.*]] = fmul float [[TMP0]], [[CONV]]
+// CHECK-NEXT:    [[MUL_IR:%.*]] = fmul float [[TMP0]], [[CONV1]]
+// CHECK-NEXT:    store float [[MUL_RL]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[C_REALP2:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT:    [[C_REAL3:%.*]] = load half, ptr [[C_REALP2]], align 2
+// CHECK-NEXT:    ret half [[C_REAL3]]
+//
+_Float16 mulcompound_float_float16c(float a, _Float16 _Complex c) {
+  a *= c;
+  return c;
+}
+
+// CHECK-LABEL: @RealOp(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT:    store float [[EXT]], ptr [[RETVAL]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[RETVAL]], align 2
+// CHECK-NEXT:    ret half [[TMP1]]
+//
+_Float16 RealOp(_Float16 a) {
+  return __real a;
+}
+
+// CHECK-LABEL: @RealOp_c(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// CHECK-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// CHECK-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// CHECK-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// CHECK-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// CHECK-NEXT:    store float [[EXT]], ptr [[RETVAL]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[RETVAL]], align 2
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+_Float16 RealOp_c(_Float16 _Complex a) {
+  return __real a;
+}
+
+// CHECK-LABEL: @ImagOp(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT:    store float 0.000000e+00, ptr [[RETVAL]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[RETVAL]], align 2
+// CHECK-NEXT:    ret half [[TMP1]]
+//
+_Float16 ImagOp(_Float16 a) {
+  return __imag a;
+}
+
+// CHECK-LABEL: @ImagOp_c(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[A:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT:    store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// CHECK-NEXT:    [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// CHECK-NEXT:    [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// CHECK-NEXT:    [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// CHECK-NEXT:    [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// CHECK-NEXT:    [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// CHECK-NEXT:    store float [[EXT1]], ptr [[RETVAL]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[RETVAL]], align 2
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+_Float16 ImagOp_c(_Float16 _Complex a) {
+  return __imag a;
+}
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -4409,6 +4409,9 @@
   /// EmitLoadOfComplex - Load a complex number from the specified l-value.
   ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc);
 
+  ComplexPairTy EmitPromotedComplexExpr(const Expr *E, QualType PromotionType);
+  llvm::Value *EmitPromotedScalarExpr(const Expr *E, QualType PromotionType);
+
   Address emitAddrOfRealComponent(Address complex, QualType complexType);
   Address emitAddrOfImagComponent(Address complex, QualType complexType);
 
Index: clang/lib/CodeGen/CGExprScalar.cpp
===================================================================
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -790,21 +790,44 @@
   // Helper functions for fixed point binary operations.
   Value *EmitFixedPointBinOp(const BinOpInfo &Ops);
 
-  BinOpInfo EmitBinOps(const BinaryOperator *E);
+  BinOpInfo EmitBinOps(const BinaryOperator *E,
+                       QualType PromotionTy = QualType());
+
+  Value *EmitPromoted(const Expr *E, QualType PromotionType);
+
   LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E,
-                            Value *(ScalarExprEmitter::*F)(const BinOpInfo &),
-                                  Value *&Result);
+                           Value *(ScalarExprEmitter::*F)(const BinOpInfo &),
+                                 Value *&Result);
 
   Value *EmitCompoundAssign(const CompoundAssignOperator *E,
                             Value *(ScalarExprEmitter::*F)(const BinOpInfo &));
 
+  QualType getPromotionType(QualType Ty) {
+    if (Ty->isAnyComplexType()) {
+      QualType ElementType = Ty->castAs<ComplexType>()->getElementType();
+      if (ElementType->isFloat16Type())
+        return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
+    }
+    if (Ty->isFloat16Type())
+      if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision())
+        return CGF.getContext().FloatTy;
+
+    return QualType();
+  }
+
   // Binary operators and binary compound assignment operators.
-#define HANDLEBINOP(OP) \
-  Value *VisitBin ## OP(const BinaryOperator *E) {                         \
-    return Emit ## OP(EmitBinOps(E));                                      \
-  }                                                                        \
-  Value *VisitBin ## OP ## Assign(const CompoundAssignOperator *E) {       \
-    return EmitCompoundAssign(E, &ScalarExprEmitter::Emit ## OP);          \
+#define HANDLEBINOP(OP)                                                        \
+  Value *VisitBin##OP(const BinaryOperator *E) {                               \
+    QualType promotionTy = getPromotionType(E->getType());                     \
+    auto result = Emit##OP(EmitBinOps(E, promotionTy));                        \
+    if (result)                                                                \
+      if (!promotionTy.isNull())                                               \
+        result = Builder.CreateFPTrunc(result, ConvertType(E->getType()),      \
+                                       "unpromotion");                         \
+    return result;                                                             \
+  }                                                                            \
+  Value *VisitBin##OP##Assign(const CompoundAssignOperator *E) {               \
+    return EmitCompoundAssign(E, &ScalarExprEmitter::Emit##OP);                \
   }
   HANDLEBINOP(Mul)
   HANDLEBINOP(Div)
@@ -3007,31 +3030,41 @@
 
 Value *ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *E) {
   Expr *Op = E->getSubExpr();
+  QualType PromotionType = getPromotionType(Op->getType());
   if (Op->getType()->isAnyComplexType()) {
     // If it's an l-value, load through the appropriate subobject l-value.
     // Note that we have to ask E because Op might be an l-value that
     // this won't work for, e.g. an Obj-C property.
-    if (E->isGLValue())
-      return CGF.EmitLoadOfLValue(CGF.EmitLValue(E),
-                                  E->getExprLoc()).getScalarVal();
-
+    if (E->isGLValue()) {
+      if (!PromotionType.isNull()) {
+        return CGF.EmitPromotedComplexExpr(Op, PromotionType).first;
+      }
+      return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), E->getExprLoc())
+          .getScalarVal();
+    }
     // Otherwise, calculate and project.
     return CGF.EmitComplexExpr(Op, false, true).first;
   }
 
+  if (!PromotionType.isNull())
+    return CGF.EmitPromotedScalarExpr(Op, PromotionType);
   return Visit(Op);
 }
 
 Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) {
   Expr *Op = E->getSubExpr();
+  QualType PromotionType = getPromotionType(Op->getType());
   if (Op->getType()->isAnyComplexType()) {
     // If it's an l-value, load through the appropriate subobject l-value.
     // Note that we have to ask E because Op might be an l-value that
     // this won't work for, e.g. an Obj-C property.
-    if (Op->isGLValue())
-      return CGF.EmitLoadOfLValue(CGF.EmitLValue(E),
-                                  E->getExprLoc()).getScalarVal();
-
+    if (Op->isGLValue()) {
+      if (!PromotionType.isNull()) {
+        return CGF.EmitPromotedComplexExpr(Op, PromotionType).second;
+      }
+      return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), E->getExprLoc())
+          .getScalarVal();
+    }
     // Otherwise, calculate and project.
     return CGF.EmitComplexExpr(Op, true, false).second;
   }
@@ -3040,8 +3073,12 @@
   // effects are evaluated, but not the actual value.
   if (Op->isGLValue())
     CGF.EmitLValue(Op);
+  else if (!PromotionType.isNull())
+    CGF.EmitPromotedScalarExpr(Op, PromotionType);
   else
     CGF.EmitScalarExpr(Op, true);
+  if (!PromotionType.isNull())
+    return llvm::Constant::getNullValue(ConvertType(PromotionType));
   return llvm::Constant::getNullValue(ConvertType(E->getType()));
 }
 
@@ -3049,12 +3086,42 @@
 //                           Binary Operators
 //===----------------------------------------------------------------------===//
 
-BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) {
+Value *ScalarExprEmitter::EmitPromoted(const Expr *E, QualType PromotionType) {
+  if (auto BO = dyn_cast<BinaryOperator>(E)) {
+    switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP)                                                       \
+  case BO_##OP:                                                                \
+    return Emit##OP(EmitBinOps(BO, PromotionType));
+      HANDLE_BINOP(Add)
+      HANDLE_BINOP(Sub)
+      HANDLE_BINOP(Mul)
+      HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+    default:
+      break;
+    }
+  } else {
+    auto result = Visit(const_cast<Expr *>(E));
+    if (result)
+      return CGF.Builder.CreateFPExt(result, ConvertType(PromotionType), "ext");
+  }
+  // fallback path
+  auto result = Visit(const_cast<Expr *>(E));
+  if (result)
+    result = CGF.Builder.CreateFPExt(result, ConvertType(E->getType()));
+  return result;
+}
+
+BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E,
+                                        QualType PromotionType) {
   TestAndClearIgnoreResultAssign();
   BinOpInfo Result;
-  Result.LHS = Visit(E->getLHS());
-  Result.RHS = Visit(E->getRHS());
-  Result.Ty  = E->getType();
+  Result.LHS = CGF.EmitPromotedScalarExpr(E->getLHS(), PromotionType);
+  Result.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType);
+  if (!PromotionType.isNull())
+    Result.Ty = PromotionType;
+  else
+    Result.Ty  = E->getType();
   Result.Opcode = E->getOpcode();
   Result.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
   Result.E = E;
@@ -3062,9 +3129,8 @@
 }
 
 LValue ScalarExprEmitter::EmitCompoundAssignLValue(
-                                              const CompoundAssignOperator *E,
-                        Value *(ScalarExprEmitter::*Func)(const BinOpInfo &),
-                                                   Value *&Result) {
+    const CompoundAssignOperator *E,
+    Value *(ScalarExprEmitter::*Func)(const BinOpInfo &), Value *&Result) {
   QualType LHSTy = E->getLHS()->getType();
   BinOpInfo OpInfo;
 
@@ -3073,8 +3139,18 @@
 
   // Emit the RHS first.  __block variables need to have the rhs evaluated
   // first, plus this should improve codegen a little.
-  OpInfo.RHS = Visit(E->getRHS());
-  OpInfo.Ty = E->getComputationResultType();
+
+  QualType PromotionTypeCR;
+  PromotionTypeCR = getPromotionType(E->getComputationResultType());
+  if (PromotionTypeCR.isNull())
+      PromotionTypeCR = E->getComputationResultType();
+  QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType());
+  QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType());
+  if (!PromotionTypeRHS.isNull())
+    OpInfo.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionTypeRHS);
+  else
+    OpInfo.RHS = Visit(E->getRHS());
+  OpInfo.Ty = PromotionTypeCR;
   OpInfo.Opcode = E->getOpcode();
   OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
   OpInfo.E = E;
@@ -3153,16 +3229,20 @@
 
   CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, OpInfo.FPFeatures);
   SourceLocation Loc = E->getExprLoc();
-  OpInfo.LHS =
-      EmitScalarConversion(OpInfo.LHS, LHSTy, E->getComputationLHSType(), Loc);
+  if (!PromotionTypeLHS.isNull())
+    OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, PromotionTypeLHS,
+                                      E->getExprLoc());
+  else
+    OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
+                                      E->getComputationLHSType(), Loc);
 
   // Expand the binary operator.
   Result = (this->*Func)(OpInfo);
 
   // Convert the result back to the LHS type,
   // potentially with Implicit Conversion sanitizer check.
-  Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy,
-                                Loc, ScalarConversionOpts(CGF.SanOpts));
+  Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc,
+                                ScalarConversionOpts(CGF.SanOpts));
 
   if (atomicPHI) {
     llvm::BasicBlock *curBlock = Builder.GetInsertBlock();
@@ -4895,6 +4975,15 @@
       .EmitComplexToScalarConversion(Src, SrcTy, DstTy, Loc);
 }
 
+Value *
+CodeGenFunction::EmitPromotedScalarExpr(const Expr *E,
+                                        QualType PromotionType) {
+  if (!PromotionType.isNull())
+    return ScalarExprEmitter(*this).EmitPromoted(E, PromotionType);
+  else
+    return ScalarExprEmitter(*this).Visit(const_cast<Expr *>(E));
+}
+
 
 llvm::Value *CodeGenFunction::
 EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
Index: clang/lib/CodeGen/CGExprComplex.cpp
===================================================================
--- clang/lib/CodeGen/CGExprComplex.cpp
+++ clang/lib/CodeGen/CGExprComplex.cpp
@@ -253,7 +253,10 @@
     QualType Ty;  // Computation Type.
   };
 
-  BinOpInfo EmitBinOps(const BinaryOperator *E);
+  BinOpInfo EmitBinOps(const BinaryOperator *E,
+                       QualType PromotionTy = QualType());
+  ComplexPairTy EmitPromoted(const Expr *E, QualType PromotionTy);
+  ComplexPairTy EmitPromotedComplexOperand(const Expr *E, QualType PromotionTy);
   LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E,
                                   ComplexPairTy (ComplexExprEmitter::*Func)
                                   (const BinOpInfo &),
@@ -270,18 +273,45 @@
   ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName,
                                         const BinOpInfo &Op);
 
-  ComplexPairTy VisitBinAdd(const BinaryOperator *E) {
-    return EmitBinAdd(EmitBinOps(E));
-  }
-  ComplexPairTy VisitBinSub(const BinaryOperator *E) {
-    return EmitBinSub(EmitBinOps(E));
-  }
-  ComplexPairTy VisitBinMul(const BinaryOperator *E) {
-    return EmitBinMul(EmitBinOps(E));
-  }
-  ComplexPairTy VisitBinDiv(const BinaryOperator *E) {
-    return EmitBinDiv(EmitBinOps(E));
-  }
+ QualType getPromotionType(QualType Ty) {
+   QualType PromotedTy;
+   if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) {
+     if (Ty->isRealFloatingType()) {
+       const auto *BT = dyn_cast<BuiltinType>(Ty);
+       if (BT->getKind() == BuiltinType::Float16)
+         PromotedTy = CGF.getContext().FloatTy;
+     } else {
+       assert(Ty->isAnyComplexType() && "Expecting to promote a complex type!");
+       QualType ElementType = Ty->castAs<ComplexType>()->getElementType();
+       if (ElementType->isFloat16Type())
+         PromotedTy = CGF.getContext().getComplexType(CGF.getContext().FloatTy);
+     }
+   }
+      return PromotedTy;
+  }
+
+#define HANDLEBINOP(OP)                                                        \
+  ComplexPairTy VisitBin##OP(const BinaryOperator *E) {                        \
+    QualType promotionTy = getPromotionType(E->getType());                     \
+    ComplexPairTy result = EmitBin##OP(EmitBinOps(E, promotionTy));            \
+    if (!promotionTy.isNull()) {                                               \
+      llvm::Type *ComplexElementTy = CGF.ConvertType(                          \
+          E->getType()->castAs<ComplexType>()->getElementType());              \
+      if (result.first)                                                        \
+        result.first = Builder.CreateFPTrunc(result.first, ComplexElementTy,   \
+                                             "unpromotion");                   \
+      if (result.second)                                                       \
+        result.second = Builder.CreateFPTrunc(result.second, ComplexElementTy, \
+                                              "unpromotion");                  \
+    }                                                                          \
+    return result;                                                             \
+  }
+
+  HANDLEBINOP(Mul)
+  HANDLEBINOP(Div)
+  HANDLEBINOP(Add)
+  HANDLEBINOP(Sub)
+#undef HANDLEBINOP
 
   ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
     return Visit(E->getSemanticForm());
@@ -876,25 +906,81 @@
   return ComplexPairTy(DSTr, DSTi);
 }
 
+ComplexPairTy ComplexExprEmitter::EmitPromoted(const Expr *E,
+                                               QualType PromotionType) {
+  llvm::Value *Resultr = nullptr;
+  llvm::Value *Resulti = nullptr;
+  if (auto BO = dyn_cast<BinaryOperator>(E)) {
+    switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP)                                                       \
+  case BO_##OP:                                                                \
+    return EmitBin##OP(EmitBinOps(BO, PromotionType));
+      HANDLE_BINOP(Add)
+      HANDLE_BINOP(Sub)
+      HANDLE_BINOP(Mul)
+      HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+    default:
+      break;
+    }
+  } else {
+    auto result = Visit(const_cast<Expr *>(E));
+    llvm::Type *ComplexElementTy;
+    if (!PromotionType.isNull())
+      ComplexElementTy = CGF.ConvertType(
+          PromotionType->castAs<ComplexType>()->getElementType());
+    else
+      ComplexElementTy = CGF.ConvertType(
+          E->getType()->castAs<ComplexType>()->getElementType());
+    if (result.first)
+      Resultr = CGF.Builder.CreateFPExt(result.first, ComplexElementTy, "ext");
+    if (result.second)
+      Resulti = CGF.Builder.CreateFPExt(result.second, ComplexElementTy, "ext");
+  }
+  return ComplexPairTy(Resultr, Resulti);
+}
+
+ComplexPairTy CodeGenFunction::EmitPromotedComplexExpr(const Expr *E,
+                                                       QualType DstTy) {
+  return ComplexExprEmitter(*this).EmitPromoted(E, DstTy);
+}
+
+ComplexPairTy
+ComplexExprEmitter::EmitPromotedComplexOperand(const Expr *E,
+                                               QualType PromotionType) {
+  if (E->getType()->isAnyComplexType()) {
+    if (!PromotionType.isNull())
+      return ComplexPairTy(CGF.EmitPromotedComplexExpr(E, PromotionType));
+    else
+      return Visit(const_cast<Expr *>(E));
+  } else {
+    if (!PromotionType.isNull()) {
+      QualType ComplexElementTy =
+          PromotionType->castAs<ComplexType>()->getElementType();
+      return ComplexPairTy(CGF.EmitPromotedScalarExpr(E, ComplexElementTy),
+                           nullptr);
+    } else {
+      return ComplexPairTy(CGF.EmitScalarExpr(E), nullptr);
+    }
+  }
+}
+
 ComplexExprEmitter::BinOpInfo
-ComplexExprEmitter::EmitBinOps(const BinaryOperator *E) {
+ComplexExprEmitter::EmitBinOps(const BinaryOperator *E,
+                               QualType PromotionType) {
   TestAndClearIgnoreReal();
   TestAndClearIgnoreImag();
   BinOpInfo Ops;
-  if (E->getLHS()->getType()->isRealFloatingType())
-    Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr);
-  else
-    Ops.LHS = Visit(E->getLHS());
-  if (E->getRHS()->getType()->isRealFloatingType())
-    Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
-  else
-    Ops.RHS = Visit(E->getRHS());
 
-  Ops.Ty = E->getType();
+  Ops.LHS = EmitPromotedComplexOperand(E->getLHS(), PromotionType);
+  Ops.RHS = EmitPromotedComplexOperand(E->getRHS(), PromotionType);
+  if (!PromotionType.isNull())
+    Ops.Ty = PromotionType;
+  else
+    Ops.Ty = E->getType();
   return Ops;
 }
 
-
 LValue ComplexExprEmitter::
 EmitCompoundAssignLValue(const CompoundAssignOperator *E,
           ComplexPairTy (ComplexExprEmitter::*Func)(const BinOpInfo&),
@@ -911,19 +997,35 @@
   // Load the RHS and LHS operands.
   // __block variables need to have the rhs evaluated first, plus this should
   // improve codegen a little.
-  OpInfo.Ty = E->getComputationResultType();
+  QualType PromotionTypeCR;
+  PromotionTypeCR = getPromotionType(E->getComputationResultType());
+  if (PromotionTypeCR.isNull())
+    PromotionTypeCR = E->getComputationResultType();
+  OpInfo.Ty = PromotionTypeCR;
   QualType ComplexElementTy = cast<ComplexType>(OpInfo.Ty)->getElementType();
+  QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType());
 
   // The RHS should have been converted to the computation type.
   if (E->getRHS()->getType()->isRealFloatingType()) {
-    assert(
-        CGF.getContext()
-            .hasSameUnqualifiedType(ComplexElementTy, E->getRHS()->getType()));
-    OpInfo.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
+    if (!PromotionTypeRHS.isNull())
+      OpInfo.RHS = ComplexPairTy(
+          CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionTypeRHS),
+          nullptr);
+    else {
+      assert(CGF.getContext().hasSameUnqualifiedType(ComplexElementTy,
+                                                     E->getRHS()->getType()));
+
+      OpInfo.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
+    }
   } else {
-    assert(CGF.getContext()
-               .hasSameUnqualifiedType(OpInfo.Ty, E->getRHS()->getType()));
-    OpInfo.RHS = Visit(E->getRHS());
+    if (!PromotionTypeRHS.isNull()) {
+      OpInfo.RHS = ComplexPairTy(
+          CGF.EmitPromotedComplexExpr(E->getRHS(), PromotionTypeRHS));
+    } else {
+      assert(CGF.getContext().hasSameUnqualifiedType(OpInfo.Ty,
+                                                     E->getRHS()->getType()));
+      OpInfo.RHS = Visit(E->getRHS());
+    }
   }
 
   LValue LHS = CGF.EmitLValue(E->getLHS());
@@ -938,9 +1040,14 @@
     // For floating point real operands we can directly pass the scalar form
     // to the binary operator emission and potentially get more efficient code.
     if (LHSTy->isRealFloatingType()) {
+      QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType());
       if (!CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, LHSTy))
         LHSVal = CGF.EmitScalarConversion(LHSVal, LHSTy, ComplexElementTy, Loc);
-      OpInfo.LHS = ComplexPairTy(LHSVal, nullptr);
+      if (!PromotionTypeLHS.isNull()) {
+        OpInfo.LHS = ComplexPairTy(LHSVal, nullptr);
+      } else {
+        OpInfo.LHS = ComplexPairTy(LHSVal, nullptr);
+      }
     } else {
       OpInfo.LHS = EmitScalarToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
     }
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -288,6 +288,10 @@
     return false;
   }
 
+  bool shouldEmitFloat16WithExcessPrecision() const {
+    return HasFloat16 && !hasLegalHalfType();
+  }
+
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -239,6 +239,7 @@
       HasAVX512ER = true;
     } else if (Feature == "+avx512fp16") {
       HasAVX512FP16 = true;
+      HasLegalHalfType = true;
     } else if (Feature == "+avx512pf") {
       HasAVX512PF = true;
     } else if (Feature == "+avx512dq") {
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -912,6 +912,8 @@
     return true;
   }
 
+  virtual bool shouldEmitFloat16WithExcessPrecision() const { return false; }
+
   /// Specify if mangling based on address space map should be used or
   /// not for language specific address spaces
   bool useAddressSpaceMapMangling() const {
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -749,7 +749,11 @@
 includes all 64-bit and all recent 32-bit processors. When the target supports
 AVX512-FP16, ``_Float16`` arithmetic is performed using that native support.
 Otherwise, ``_Float16`` arithmetic is performed by promoting to ``float``,
-performing the operation, and then truncating to ``_Float16``.
+performing the operation, and then truncating to ``_Float16``. When doing this
+emulation, Clang defaults to following the C standard's rules for excess
+precision arithmetic, which avoids intermediate truncations within statements
+and may generate different results from a strict operation-by-operation
+emulation.
 
 ``_Float16`` will be supported on more targets as they define ABIs for it.
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to