FreddyYe created this revision.
Herald added a subscriber: pengfei.
FreddyYe requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D114059

Files:
  clang/lib/Headers/avx512vlbf16intrin.h
  clang/test/CodeGen/X86/avx512vlbf16-builtins.c

Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx512vlbf16-builtins.c
+++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c
@@ -169,6 +169,15 @@
   return _mm_cvtness_sbh(A);
 }
 
+__m128 test_mm_cvtpbh_ps(__m128bh A) {
+  // CHECK-LABEL: @test_mm_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_cvtpbh_ps(A);
+}
+
 __m256 test_mm256_cvtpbh_ps(__m128bh A) {
   // CHECK-LABEL: @test_mm256_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
@@ -178,6 +187,16 @@
   return _mm256_cvtpbh_ps(A);
 }
 
+__m128 test_mm_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_maskz_cvtpbh_ps(M, A);
+}
+
 __m256 test_mm256_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
@@ -188,6 +207,16 @@
   return _mm256_maskz_cvtpbh_ps(M, A);
 }
 
+__m128 test_mm_mask_cvtpbh_ps(__m128 S, __mmask8 M, __m128bh A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_mask_cvtpbh_ps(S, M, A);
+}
+
 __m256 test_mm256_mask_cvtpbh_ps(__m256 S, __mmask8 M, __m128bh A) {
   // CHECK-LABEL: @test_mm256_mask_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
Index: clang/lib/Headers/avx512vlbf16intrin.h
===================================================================
--- clang/lib/Headers/avx512vlbf16intrin.h
+++ clang/lib/Headers/avx512vlbf16intrin.h
@@ -420,6 +420,18 @@
   return __R[0];
 }
 
+/// Convert Packed BF16 Data to Packed float Data.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \param __A
+///    A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from convertion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) {
+  return _mm_castsi128_ps(
+      (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16));
+}
+
 /// Convert Packed BF16 Data to Packed float Data.
 ///
 /// \headerfile <x86intrin.h>
@@ -432,6 +444,22 @@
       (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16));
 }
 
+/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \param __U
+///    A 4-bit mask. Elements are zeroed out when the corresponding mask
+///    bit is not set.
+/// \param __A
+///    A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from convertion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
+  return _mm_castsi128_ps((__m128i)_mm_slli_epi32(
+      (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
+}
+
 /// Convert Packed BF16 Data to Packed float Data using zeroing mask.
 ///
 /// \headerfile <x86intrin.h>
@@ -448,6 +476,26 @@
       (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
 }
 
+/// Convert Packed BF16 Data to Packed float Data using merging mask.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \param __S
+///    A 128-bit vector of [4 x float]. Elements are copied from __S when
+///     the corresponding mask bit is not set.
+/// \param __U
+///    A 4-bit mask. Elements are zeroed out when the corresponding mask
+///    bit is not set.
+/// \param __A
+///    A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from convertion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) {
+  return _mm_castsi128_ps((__m128i)_mm_mask_slli_epi32(
+      (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32((__m128i)__A),
+      16));
+}
+
 /// Convert Packed BF16 Data to Packed float Data using merging mask.
 ///
 /// \headerfile <x86intrin.h>
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to