[PATCH] D60552: Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake

2019-04-10 Thread Tianle Liu via Phabricator via cfe-commits
liutianle created this revision.
liutianle added reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, 
annita.zhang.
Herald added subscribers: cfe-commits, mgorny.
Herald added a project: clang.

1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in 
Cooper Lake;
2. Enable intrinsics for VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS 
instructions, which are Vector Neural Network Instructions supporting BFLOAT16 
inputs and conversion instructions from IEEE single precision.

For more details about BF16 intrinsic, please refer to the latest ISE document: 
https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference


Repository:
  rC Clang

https://reviews.llvm.org/D60552

Files:
  docs/ClangCommandLineReference.rst
  include/clang/Basic/BuiltinsX86.def
  include/clang/Driver/Options.td
  lib/Basic/Targets/X86.cpp
  lib/Basic/Targets/X86.h
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/CMakeLists.txt
  lib/Headers/avx512bf16intrin.h
  lib/Headers/avx512vlbf16intrin.h
  lib/Headers/cpuid.h
  lib/Headers/immintrin.h
  test/CodeGen/attr-target-x86.c
  test/CodeGen/avx512bf16-builtins.c
  test/CodeGen/avx512vlbf16-builtins.c

Index: test/CodeGen/avx512vlbf16-builtins.c
===
--- /dev/null
+++ test/CodeGen/avx512vlbf16-builtins.c
@@ -0,0 +1,142 @@
+//  RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin \
+//  RUN:-target-feature +avx512bf16 -target-feature \
+//  RUN:+avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include 
+
+__m128bh test_mm_cvtne2ps2bf16(__m128 A, __m128 B) {
+  // CHECK-LABEL: @test_mm_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  return _mm_cvtne2ps_pbh(A, B);
+}
+
+__m128bh test_mm_maskz_cvtne2ps2bf16(__m128 A, __m128 B, __mmask8 U) {
+  // CHECK-LABEL: @test_mm_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  return _mm_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m128bh test_mm_mask_cvtne2ps2bf16(__m128bh C, __mmask8 U, __m128 A, __m128 B) {
+  // CHECK-LABEL: @test_mm_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  return _mm_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m256bh test_mm256_cvtne2ps2bf16(__m256 A, __m256 B) {
+  // CHECK-LABEL: @test_mm256_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  return _mm256_cvtne2ps_pbh(A, B);
+}
+
+__m256bh test_mm256_maskz_cvtne2ps2bf16(__m256 A, __m256 B, __mmask16 U) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  return _mm256_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m256bh test_mm256_mask_cvtne2ps2bf16(__m256bh C, __mmask16 U, __m256 A, __m256 B) {
+  // CHECK-LABEL: @test_mm256_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  return _mm256_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m512bh test_mm512_cvtne2ps2bf16(__m512 A, __m512 B) {
+  // CHECK-LABEL: @test_mm512_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  return _mm512_cvtne2ps_pbh(A, B);
+}
+
+__m512bh test_mm512_maskz_cvtne2ps2bf16(__m512 A, __m512 B, __mmask32 U) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m512bh test_mm512_mask_cvtne2ps2bf16(__m512bh C, __mmask32 U, __m512 A, __m512 B) {
+  // CHECK-LABEL: @test_mm512_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m128bh test_mm_cvtneps2bf16(__m128 A) {
+  // CHECK-LABEL: @test_mm_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128
+  return _mm_cvtneps_pbh(A);
+}
+
+__m128bh test_mm_mask_cvtneps2bf16(__m128bh C, __mmask8 U, __m128 A) {
+  // CHECK-LABEL: @test_mm_mask_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.
+  return _mm_mask_cvtneps_pbh(C, U, A);
+}
+
+__m128bh test_mm_maskz_cvtneps2bf16(__m128 A, __mmask8 U) {
+  // CHECK-LABEL: @test_mm_maskz_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128
+  return _mm_maskz_cvtneps_pbh(U, A);
+}
+
+__m128bh test_mm256_cvtneps2bf16(__m256 A) {
+  // CHECK-LABEL: @test_mm256_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256
+  return _mm256_cvtneps_pbh(A);
+}
+
+__m128bh test_mm256_mask_cvtneps2bf16(__m128bh C, __mmask8 U, __m256 A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf1

[PATCH] D60408: [LibTooling] Extend Transformer to support multiple simultaneous changes.

2019-04-19 Thread Tianle Liu via Phabricator via cfe-commits
liutianle added a comment.

hi ymandel,

When I run "check-all", there are some warning/error in TransformerTest.cpp as 
follow. My version is llvm:0ee120077 and clang:d87ee8e678. Could you please 
have a fix or guild me how to fix it?


In file included from 
myLLVM/llvm/utils/unittest/googlemock/include/gmock/internal/gmock-internal-utils.h:47:0,

  from myLLVM/llvm/utils/unittest/googlemock/include/gmock/gmock-actions.h:46,
  from myLLVM/llvm/utils/unittest/googlemock/include/gmock/gmock.h:58,
  from myLLVM/llvm/tools/clang/unittests/Tooling/TransformerTest.cpp:13:

myLLVM/llvm/utils/unittest/googletest/include/gtest/gtest.h: In instantiation 
of 'testing::AssertionResult testing::internal::CmpHelperEQ(const char*, const 
char*, const T1&, const T2&) [with T1 = long unsigned int; T2 = int]':
myLLVM/llvm/utils/unittest/googletest/include/gtest/gtest.h:1421:23:   required 
from 'static testing::AssertionResult 
testing::internal::EqHelper::Compare(const char*, const 
char*, const T1&, const T2&) [with T1 = long unsigned int; T2 = int; bool 
lhs_is_null_literal = false]'
myLLVM/llvm/tools/clang/unittests/Tooling/TransformerTest.cpp:372:3:   required 
from here
myLLVM/llvm/utils/unittest/googletest/include/gtest/gtest.h:1392:11: warning: 
comparison between signed and unsigned integer expressions [-Wsign-compare]

  if (lhs == rhs) {
  ^

--


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60408/new/

https://reviews.llvm.org/D60408



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60408: [LibTooling] Extend Transformer to support multiple simultaneous changes.

2019-04-22 Thread Tianle Liu via Phabricator via cfe-commits
liutianle added a comment.

In D60408#1474100 , @ymandel wrote:

> In D60408#1473370 , @liutianle wrote:
>
> > hi ymandel,
> >  When I run "check-all", there are some warning/error in 
> > TransformerTest.cpp as follow. My version is llvm:0ee120077 and 
> > clang:d87ee8e678. Could you please have a fix or guild me how to fix it?
> >
> >  
> > ---
> >
> > In file included from 
> > myLLVM/llvm/utils/unittest/googlemock/include/gmock/internal/gmock-internal-utils.h:47:0,
> >
> >   from 
> > myLLVM/llvm/utils/unittest/googlemock/include/gmock/gmock-actions.h:46,
> >   from myLLVM/llvm/utils/unittest/googlemock/include/gmock/gmock.h:58,
> >   from myLLVM/llvm/tools/clang/unittests/Tooling/TransformerTest.cpp:13:
> >
> > myLLVM/llvm/utils/unittest/googletest/include/gtest/gtest.h: In 
> > instantiation of 'testing::AssertionResult 
> > testing::internal::CmpHelperEQ(const char*, const char*, const T1&, const 
> > T2&) [with T1 = long unsigned int; T2 = int]':
> >  myLLVM/llvm/utils/unittest/googletest/include/gtest/gtest.h:1421:23:   
> > required from 'static testing::AssertionResult 
> > testing::internal::EqHelper::Compare(const char*, 
> > const char*, const T1&, const T2&) [with T1 = long unsigned int; T2 = int; 
> > bool lhs_is_null_literal = false]'
> >  myLLVM/llvm/tools/clang/unittests/Tooling/TransformerTest.cpp:372:3:   
> > required from here
> >  myLLVM/llvm/utils/unittest/googletest/include/gtest/gtest.h:1392:11: 
> > warning: comparison between signed and unsigned integer expressions 
> > [-Wsign-compare]
> >
> >   if (lhs == rhs) {
> >   ^
> >
> >  
> > --
>
>
> Sorry about that. Have you sync'd to head? I thought that was fixed with 
> r358745 (from Bjorn Pettersson).


I see. Thank you very much!


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60408/new/

https://reviews.llvm.org/D60408



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60552: [X86] Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake

2019-04-23 Thread Tianle Liu via Phabricator via cfe-commits
liutianle updated this revision to Diff 196195.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60552/new/

https://reviews.llvm.org/D60552

Files:
  docs/ClangCommandLineReference.rst
  include/clang/Basic/BuiltinsX86.def
  include/clang/Driver/Options.td
  lib/Basic/Targets/X86.cpp
  lib/Basic/Targets/X86.h
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/CMakeLists.txt
  lib/Headers/avx512bf16intrin.h
  lib/Headers/avx512vlbf16intrin.h
  lib/Headers/cpuid.h
  lib/Headers/immintrin.h
  test/CodeGen/attr-target-x86.c
  test/CodeGen/avx512bf16-builtins.c
  test/CodeGen/avx512vlbf16-builtins.c

Index: test/CodeGen/avx512vlbf16-builtins.c
===
--- /dev/null
+++ test/CodeGen/avx512vlbf16-builtins.c
@@ -0,0 +1,163 @@
+//  RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin \
+//  RUN:-target-feature +avx512bf16 -target-feature \
+//  RUN:+avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include 
+
+__m128bh test_mm_cvtne2ps2bf16(__m128 A, __m128 B) {
+  // CHECK-LABEL: @test_mm_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_cvtne2ps_pbh(A, B);
+}
+
+__m128bh test_mm_maskz_cvtne2ps2bf16(__m128 A, __m128 B, __mmask8 U) {
+  // CHECK-LABEL: @test_mm_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m128bh test_mm_mask_cvtne2ps2bf16(__m128bh C, __mmask8 U, __m128 A, __m128 B) {
+  // CHECK-LABEL: @test_mm_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m256bh test_mm256_cvtne2ps2bf16(__m256 A, __m256 B) {
+  // CHECK-LABEL: @test_mm256_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm256_cvtne2ps_pbh(A, B);
+}
+
+__m256bh test_mm256_maskz_cvtne2ps2bf16(__m256 A, __m256 B, __mmask16 U) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm256_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m256bh test_mm256_mask_cvtne2ps2bf16(__m256bh C, __mmask16 U, __m256 A, __m256 B) {
+  // CHECK-LABEL: @test_mm256_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm256_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m512bh test_mm512_cvtne2ps2bf16(__m512 A, __m512 B) {
+  // CHECK-LABEL: @test_mm512_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: ret <32 x i16> %{{.*}}
+  return _mm512_cvtne2ps_pbh(A, B);
+}
+
+__m512bh test_mm512_maskz_cvtne2ps2bf16(__m512 A, __m512 B, __mmask32 U) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  // CHECK: ret <32 x i16> %{{.*}}
+  return _mm512_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m512bh test_mm512_mask_cvtne2ps2bf16(__m512bh C, __mmask32 U, __m512 A, __m512 B) {
+  // CHECK-LABEL: @test_mm512_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  // CHECK: ret <32 x i16> %{{.*}}
+  return _mm512_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m128bh test_mm_cvtneps2bf16(__m128 A) {
+  // CHECK-LABEL: @test_mm_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_cvtneps_pbh(A);
+}
+
+__m128bh test_mm_mask_cvtneps2bf16(__m128bh C, __mmask8 U, __m128 A) {
+  // CHECK-LABEL: @test_mm_mask_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_mask_cvtneps_pbh(C, U, A);
+}
+
+__m128bh test_mm_maskz_cvtneps2bf16(__m128 A, __mmask8 U) {
+  // CHECK-LABEL: @test_mm_maskz_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_maskz_cvtneps_pbh(U, A);
+}
+
+__m128bh test_mm256_cvtneps2bf16(__m256 A) {
+  // CHECK-LABEL: @test_mm256_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm256_cvtneps_pbh(A);
+}
+
+__m128bh test_mm256_mask_cvtneps2bf16(__m128bh C, __mmask8 U, __m256 A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtneps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm256_mask_cv

[PATCH] D60552: [X86] Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake

2019-04-23 Thread Tianle Liu via Phabricator via cfe-commits
liutianle added a comment.

@RKSimon I add doxygen based descriptions in header files and return type check 
in test files. Please review again.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60552/new/

https://reviews.llvm.org/D60552



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60552: [X86] Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake

2019-04-29 Thread Tianle Liu via Phabricator via cfe-commits
liutianle added a comment.

@RKSimon @craig.topper , could you please review again?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60552/new/

https://reviews.llvm.org/D60552



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60552: [X86] Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake

2019-05-04 Thread Tianle Liu via Phabricator via cfe-commits
liutianle updated this revision to Diff 198168.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60552/new/

https://reviews.llvm.org/D60552

Files:
  docs/ClangCommandLineReference.rst
  include/clang/Basic/BuiltinsX86.def
  include/clang/Driver/Options.td
  lib/Basic/Targets/X86.cpp
  lib/Basic/Targets/X86.h
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/CMakeLists.txt
  lib/Headers/avx512bf16intrin.h
  lib/Headers/avx512vlbf16intrin.h
  lib/Headers/cpuid.h
  lib/Headers/immintrin.h
  test/CodeGen/attr-target-x86.c
  test/CodeGen/avx512bf16-builtins.c
  test/CodeGen/avx512vlbf16-builtins.c
  test/Driver/x86-target-features.c
  test/Preprocessor/x86_target_features.c

Index: test/Preprocessor/x86_target_features.c
===
--- test/Preprocessor/x86_target_features.c
+++ test/Preprocessor/x86_target_features.c
@@ -443,3 +443,18 @@
 // RUN: %clang -target i386-unknown-unknown -march=atom -mrdpid -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=RDPID %s
 
 // RDPID: #define __RDPID__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16 %s
+
+// AVX512BF16: #define __AVX512BF16__ 1
+// AVX512BF16: #define __AVX512BW__ 1
+// AVX512BF16: #define __AVX512VL__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512BW %s
+
+// AVX512BF16_NOAVX512BW-NOT: #define __AVX512BF16__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512VL %s
+
+// AVX512BF16_NOAVX512VL-NOT: #define __AVX512BF16__ 1
+
Index: test/Driver/x86-target-features.c
===
--- test/Driver/x86-target-features.c
+++ test/Driver/x86-target-features.c
@@ -178,3 +178,8 @@
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-invpcid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-INVPCID %s
 // INVPCID: "-target-feature" "+invpcid"
 // NO-INVPCID: "-target-feature" "-invpcid"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX512BF16 %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX512BF16 %s
+// AVX512BF16: "-target-feature" "+avx512bf16"
+// NO-AVX512BF16: "-target-feature" "-avx512bf16"
Index: test/CodeGen/avx512vlbf16-builtins.c
===
--- /dev/null
+++ test/CodeGen/avx512vlbf16-builtins.c
@@ -0,0 +1,163 @@
+//  RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin \
+//  RUN:-target-feature +avx512bf16 -target-feature \
+//  RUN:+avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include 
+
+__m128bh test_mm_cvtne2ps2bf16(__m128 A, __m128 B) {
+  // CHECK-LABEL: @test_mm_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_cvtne2ps_pbh(A, B);
+}
+
+__m128bh test_mm_maskz_cvtne2ps2bf16(__m128 A, __m128 B, __mmask8 U) {
+  // CHECK-LABEL: @test_mm_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m128bh test_mm_mask_cvtne2ps2bf16(__m128bh C, __mmask8 U, __m128 A, __m128 B) {
+  // CHECK-LABEL: @test_mm_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  // CHECK: ret <8 x i16> %{{.*}}
+  return _mm_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m256bh test_mm256_cvtne2ps2bf16(__m256 A, __m256 B) {
+  // CHECK-LABEL: @test_mm256_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm256_cvtne2ps_pbh(A, B);
+}
+
+__m256bh test_mm256_maskz_cvtne2ps2bf16(__m256 A, __m256 B, __mmask16 U) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm256_maskz_cvtne2ps_pbh(U, A, B);
+}
+
+__m256bh test_mm256_mask_cvtne2ps2bf16(__m256bh C, __mmask16 U, __m256 A, __m256 B) {
+  // CHECK-LABEL: @test_mm256_mask_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  // CHECK: ret <16 x i16> %{{.*}}
+  return _mm256_mask_cvtne2ps_pbh(C, U, A, B);
+}
+
+__m512bh test_mm512_cvtne2ps2bf16(__m512 A, __m512 B) {
+  // CHECK-LABEL: @test_mm512_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2b