pengfei created this revision. pengfei added reviewers: LuoYuanke, craig.topper, yubing, RKSimon, skan, FreddyYe. Herald added a subscriber: StephenFan. Herald added a project: All. pengfei requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
Since we have enabled the support for `_Float16` on SSE2, we can relax the limitation for AVX512FP16 now. This helps for user to use AVX512FP16 mixed with unsupported versions, e.g., multiversioning. Also fix lit fails due to missing const modifier. Found during this change. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D132342 Files: clang/include/clang/Basic/BuiltinsX86.def clang/lib/Headers/avx512fp16intrin.h clang/lib/Headers/immintrin.h Index: clang/lib/Headers/immintrin.h =================================================================== --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -214,17 +214,14 @@ #include <avx512pfintrin.h> #endif -/* - * FIXME: _Float16 type is legal only when HW support float16 operation. - * We use __AVX512FP16__ to identify if float16 is supported or not, so - * when float16 is not supported, the related header is not included. - * - */ -#if defined(__AVX512FP16__) +#if !(defined(_MSC_VER) || defined(__SCE__) || !defined(__SSE2__)) || \ + __has_feature(modules) || defined(__AVX512FP16__) #include <avx512fp16intrin.h> #endif -#if defined(__AVX512FP16__) && defined(__AVX512VL__) +#if !(defined(_MSC_VER) || defined(__SCE__) || !defined(__SSE2__)) || \ + __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512FP16__)) #include <avx512vlfp16intrin.h> #endif Index: clang/lib/Headers/avx512fp16intrin.h =================================================================== --- clang/lib/Headers/avx512fp16intrin.h +++ clang/lib/Headers/avx512fp16intrin.h @@ -829,7 +829,7 @@ struct __mm_load_sh_struct { _Float16 __u; } __attribute__((__packed__, __may_alias__)); - _Float16 __u = ((struct __mm_load_sh_struct *)__dp)->__u; + _Float16 __u = ((const struct __mm_load_sh_struct *)__dp)->__u; return (__m128h){__u, 0, 0, 0, 0, 0, 0, 0}; } @@ -838,13 +838,13 @@ __m128h src = (__v8hf)__builtin_shufflevector( (__v8hf)__W, (__v8hf)_mm_setzero_ph(), 0, 8, 8, 8, 8, 8, 8, 8); - return (__m128h)__builtin_ia32_loadsh128_mask((__v8hf *)__A, src, __U & 1); + return (__m128h)__builtin_ia32_loadsh128_mask((const __v8hf *)__A, src, __U & 1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_load_sh(__mmask8 __U, const void *__A) { return (__m128h)__builtin_ia32_loadsh128_mask( - (__v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1); + (const __v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 Index: clang/include/clang/Basic/BuiltinsX86.def =================================================================== --- clang/include/clang/Basic/BuiltinsX86.def +++ clang/include/clang/Basic/BuiltinsX86.def @@ -1791,7 +1791,7 @@ TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8xC*V8xUc", "nV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_rcpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
Index: clang/lib/Headers/immintrin.h =================================================================== --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -214,17 +214,14 @@ #include <avx512pfintrin.h> #endif -/* - * FIXME: _Float16 type is legal only when HW support float16 operation. - * We use __AVX512FP16__ to identify if float16 is supported or not, so - * when float16 is not supported, the related header is not included. - * - */ -#if defined(__AVX512FP16__) +#if !(defined(_MSC_VER) || defined(__SCE__) || !defined(__SSE2__)) || \ + __has_feature(modules) || defined(__AVX512FP16__) #include <avx512fp16intrin.h> #endif -#if defined(__AVX512FP16__) && defined(__AVX512VL__) +#if !(defined(_MSC_VER) || defined(__SCE__) || !defined(__SSE2__)) || \ + __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512FP16__)) #include <avx512vlfp16intrin.h> #endif Index: clang/lib/Headers/avx512fp16intrin.h =================================================================== --- clang/lib/Headers/avx512fp16intrin.h +++ clang/lib/Headers/avx512fp16intrin.h @@ -829,7 +829,7 @@ struct __mm_load_sh_struct { _Float16 __u; } __attribute__((__packed__, __may_alias__)); - _Float16 __u = ((struct __mm_load_sh_struct *)__dp)->__u; + _Float16 __u = ((const struct __mm_load_sh_struct *)__dp)->__u; return (__m128h){__u, 0, 0, 0, 0, 0, 0, 0}; } @@ -838,13 +838,13 @@ __m128h src = (__v8hf)__builtin_shufflevector( (__v8hf)__W, (__v8hf)_mm_setzero_ph(), 0, 8, 8, 8, 8, 8, 8, 8); - return (__m128h)__builtin_ia32_loadsh128_mask((__v8hf *)__A, src, __U & 1); + return (__m128h)__builtin_ia32_loadsh128_mask((const __v8hf *)__A, src, __U & 1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_load_sh(__mmask8 __U, const void *__A) { return (__m128h)__builtin_ia32_loadsh128_mask( - (__v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1); + (const __v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 Index: clang/include/clang/Basic/BuiltinsX86.def =================================================================== --- clang/include/clang/Basic/BuiltinsX86.def +++ clang/include/clang/Basic/BuiltinsX86.def @@ -1791,7 +1791,7 @@ TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512fp16") -TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8xC*V8xUc", "nV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_rcpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits