[PATCH] D105264: [X86] AVX512FP16 instructions enabling 2/6

LuoYuanke via Phabricator via cfe-commits Tue, 10 Aug 2021 05:37:39 -0700

LuoYuanke added inline comments.


================
Comment at: clang/include/clang/Basic/BuiltinsX86.def:1860
+TARGET_BUILTIN(__builtin_ia32_minph512,      "V32xV32xV32xIi", "ncV:512:", 
"avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_minph256,      "V16xV16xV16x", "ncV:256:", 
"avx512fp16,avx512vl")
----------------
Why there is no 256 and 128 version for addph, subph, mulph, divph?


================
Comment at: clang/lib/Headers/avx512fp16intrin.h:312
+                                                           __m128h B) {
+  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_US,
+                                _MM_FROUND_CUR_DIRECTION);
----------------
_CMP_NEQ_OS?


================
Comment at: clang/lib/Headers/avx512fp16intrin.h:318
+                                                           __m128h B) {
+  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OQ,
+                                _MM_FROUND_CUR_DIRECTION);
----------------
Why it is OQ not UQ? Ditto for all other ucomi intrinsics.


================
Comment at: clang/lib/Headers/avx512fp16intrin.h:516
+  return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B,
+                                          _MM_FROUND_CUR_DIRECTION);
+}
----------------
Why there is rounding control for max/min operation?


================
Comment at: clang/lib/Headers/avx512fp16intrin.h:669
+  __A = _mm_div_sh(__A, __B);
+  return __builtin_ia32_selectsh_128(__U, __A, __W);
+}
----------------
Will it be combined to one instruction? If __B[0] is 0, and mask[0] is 0, there 
is no exception? 


================
Comment at: clang/lib/Headers/avx512fp16intrin.h:698
+      (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
----------------
Do we have rounding control for min?


================
Comment at: clang/lib/Headers/avx512fp16intrin.h:757
+
+#define _mm_max_round_sh(A, B, R)                                              
\
+  (__m128h) __builtin_ia32_maxsh_round_mask(                                   
\
----------------
This name may be misleading, it means suppress exception. Right?


================
Comment at: clang/lib/Headers/avx512fp16intrin.h:952
 
+#define _mm512_mask_reduce_operator(op)                                        
\
+  __m256h __t1 = (__m256h)_mm512_extractf64x4_pd((__m512d)__W, 0);             
\
----------------
It seems there is no mask for reduce operation.


================
Comment at: clang/lib/Headers/avx512fp16intrin.h:963
+  __m128h __t9 = (__m128h)__builtin_shufflevector((__m128)__t8, (__m128)__t8,  
\
+                                                  1, 0, 2, 3);                 
\
+  __m128h __t10 = __t8 op __t9;                                                
\
----------------
Not sure if there is any room to optimize. The operation for element 2, 3 is 
unnecessary.


================
Comment at: clang/lib/Headers/avx512vlfp16intrin.h:366
 
+#define _mm256_mask_reduce_operator(op)                                        
\
+  __m128h __t1 = (__m128h)_mm256_extracti128_si256((__m256i)__W, 0);           
\
----------------
Ditto


================
Comment at: clang/lib/Headers/avx512vlfp16intrin.h:394
+
+#define _mm256_mask_reduce_operator(op)                                        
\
+  __m128h __t1 = (__m128h)_mm256_extracti128_si256((__m256i)__V, 0);           
\
----------------
Ditto.


================
Comment at: clang/test/CodeGen/X86/avx512fp16-builtins.c:639
+  return _mm512_max_round_ph(__A, __B, _MM_FROUND_NO_EXC);
+}
+__m512h test_mm512_mask_max_round_ph(__m512h __W, __mmask32 __U, __m512h __A, 
__m512h __B) {
----------------
Need a blank line?


================
Comment at: clang/test/CodeGen/X86/avx512fp16-builtins.c:645
+  return _mm512_mask_max_round_ph(__W, __U, __A, __B, _MM_FROUND_NO_EXC);
+}
+__m512h test_mm512_maskz_max_round_ph(__mmask32 __U, __m512h __A, __m512h __B) 
{
----------------
Ditto.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105264/new/

https://reviews.llvm.org/D105264

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D105264: [X86] AVX512FP16 instructions enabling 2/6

Reply via email to