[PATCH] D132342: [X86][AVX512FP16] Relax limitation to AVX512FP16 intrinsics. NFCI

Phoebe Wang via Phabricator via cfe-commits Sun, 21 Aug 2022 19:24:27 -0700

pengfei created this revision.
pengfei added reviewers: LuoYuanke, craig.topper, yubing, RKSimon, skan, 
FreddyYe.
Herald added a subscriber: StephenFan.
Herald added a project: All.
pengfei requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.


Since we have enabled the support for `_Float16` on SSE2, we can relax
the limitation for AVX512FP16 now. This helps for user to use AVX512FP16
mixed with unsupported versions, e.g., multiversioning.

Also fix lit fails due to missing const modifier. Found during this change.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D132342

Files:
  clang/include/clang/Basic/BuiltinsX86.def
  clang/lib/Headers/avx512fp16intrin.h
  clang/lib/Headers/immintrin.h


Index: clang/lib/Headers/immintrin.h
===================================================================
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -214,17 +214,14 @@
 #include <avx512pfintrin.h>
 #endif
 
-/*
- * FIXME: _Float16 type is legal only when HW support float16 operation.
- * We use __AVX512FP16__ to identify if float16 is supported or not, so
- * when float16 is not supported, the related header is not included.
- *
- */
-#if defined(__AVX512FP16__)
+#if !(defined(_MSC_VER) || defined(__SCE__) || !defined(__SSE2__)) ||          
\
+    __has_feature(modules) || defined(__AVX512FP16__)
 #include <avx512fp16intrin.h>
 #endif
 
-#if defined(__AVX512FP16__) && defined(__AVX512VL__)
+#if !(defined(_MSC_VER) || defined(__SCE__) || !defined(__SSE2__)) ||          
\
+    __has_feature(modules) ||                                                  
\
+    (defined(__AVX512VL__) && defined(__AVX512FP16__))
 #include <avx512vlfp16intrin.h>
 #endif
 
Index: clang/lib/Headers/avx512fp16intrin.h
===================================================================
--- clang/lib/Headers/avx512fp16intrin.h
+++ clang/lib/Headers/avx512fp16intrin.h
@@ -829,7 +829,7 @@
   struct __mm_load_sh_struct {
     _Float16 __u;
   } __attribute__((__packed__, __may_alias__));
-  _Float16 __u = ((struct __mm_load_sh_struct *)__dp)->__u;
+  _Float16 __u = ((const struct __mm_load_sh_struct *)__dp)->__u;
   return (__m128h){__u, 0, 0, 0, 0, 0, 0, 0};
 }
 
@@ -838,13 +838,13 @@
   __m128h src = (__v8hf)__builtin_shufflevector(
       (__v8hf)__W, (__v8hf)_mm_setzero_ph(), 0, 8, 8, 8, 8, 8, 8, 8);
 
-  return (__m128h)__builtin_ia32_loadsh128_mask((__v8hf *)__A, src, __U & 1);
+  return (__m128h)__builtin_ia32_loadsh128_mask((const __v8hf *)__A, src, __U 
& 1);
 }
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_maskz_load_sh(__mmask8 __U, const void *__A) {
   return (__m128h)__builtin_ia32_loadsh128_mask(
-      (__v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
+      (const __v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
 }
 
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
Index: clang/include/clang/Basic/BuiltinsX86.def
===================================================================
--- clang/include/clang/Basic/BuiltinsX86.def
+++ clang/include/clang/Basic/BuiltinsX86.def
@@ -1791,7 +1791,7 @@
 TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", 
"avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", 
"avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", 
"avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", 
"avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8xC*V8xUc", "nV:128:", 
"avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", 
"avx512fp16")
 
 TARGET_BUILTIN(__builtin_ia32_rcpph128_mask, "V8xV8xV8xUc", "ncV:128:", 
"avx512fp16,avx512vl")

Index: clang/lib/Headers/immintrin.h
===================================================================
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -214,17 +214,14 @@
 #include <avx512pfintrin.h>
 #endif
 
-/*
- * FIXME: _Float16 type is legal only when HW support float16 operation.
- * We use __AVX512FP16__ to identify if float16 is supported or not, so
- * when float16 is not supported, the related header is not included.
- *
- */
-#if defined(__AVX512FP16__)
+#if !(defined(_MSC_VER) || defined(__SCE__) || !defined(__SSE2__)) ||          \
+    __has_feature(modules) || defined(__AVX512FP16__)
 #include <avx512fp16intrin.h>
 #endif
 
-#if defined(__AVX512FP16__) && defined(__AVX512VL__)
+#if !(defined(_MSC_VER) || defined(__SCE__) || !defined(__SSE2__)) ||          \
+    __has_feature(modules) ||                                                  \
+    (defined(__AVX512VL__) && defined(__AVX512FP16__))
 #include <avx512vlfp16intrin.h>
 #endif
 
Index: clang/lib/Headers/avx512fp16intrin.h
===================================================================
--- clang/lib/Headers/avx512fp16intrin.h
+++ clang/lib/Headers/avx512fp16intrin.h
@@ -829,7 +829,7 @@
   struct __mm_load_sh_struct {
     _Float16 __u;
   } __attribute__((__packed__, __may_alias__));
-  _Float16 __u = ((struct __mm_load_sh_struct *)__dp)->__u;
+  _Float16 __u = ((const struct __mm_load_sh_struct *)__dp)->__u;
   return (__m128h){__u, 0, 0, 0, 0, 0, 0, 0};
 }
 
@@ -838,13 +838,13 @@
   __m128h src = (__v8hf)__builtin_shufflevector(
       (__v8hf)__W, (__v8hf)_mm_setzero_ph(), 0, 8, 8, 8, 8, 8, 8, 8);
 
-  return (__m128h)__builtin_ia32_loadsh128_mask((__v8hf *)__A, src, __U & 1);
+  return (__m128h)__builtin_ia32_loadsh128_mask((const __v8hf *)__A, src, __U & 1);
 }
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_maskz_load_sh(__mmask8 __U, const void *__A) {
   return (__m128h)__builtin_ia32_loadsh128_mask(
-      (__v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
+      (const __v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
 }
 
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
Index: clang/include/clang/Basic/BuiltinsX86.def
===================================================================
--- clang/include/clang/Basic/BuiltinsX86.def
+++ clang/include/clang/Basic/BuiltinsX86.def
@@ -1791,7 +1791,7 @@
 TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8xC*V8xUc", "nV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16")
 
 TARGET_BUILTIN(__builtin_ia32_rcpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D132342: [X86][AVX512FP16] Relax limitation to AVX512FP16 intrinsics. NFCI

Reply via email to