llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Malay Sanghi (MalaySanghi) <details> <summary>Changes</summary> Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 --- Patch is 494.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102592.diff 31 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.def (+30) - (modified) clang/include/clang/Basic/BuiltinsX86_64.def (+6) - (modified) clang/lib/Headers/CMakeLists.txt (+2) - (added) clang/lib/Headers/avx10_2_512satcvtdsintrin.h (+302) - (added) clang/lib/Headers/avx10_2satcvtdsintrin.h (+453) - (modified) clang/lib/Headers/immintrin.h (+8) - (modified) clang/lib/Sema/SemaX86.cpp (+26) - (added) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c (+52) - (added) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c (+76) - (added) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c (+184) - (added) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c (+183) - (added) clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c (+57) - (added) clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c (+223) - (added) clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c (+220) - (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+100) - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+21-2) - (modified) llvm/lib/Target/X86/X86ISelLowering.h (+18) - (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+311) - (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+12) - (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+65-1) - (added) llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll (+548) - (added) llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll (+115) - (added) llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll (+1098) - (added) llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll (+58) - (added) llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-32.txt (+1043) - (added) llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-64.txt (+1171) - (added) llvm/test/MC/X86/avx10_2satcvtds-32-att.s (+1042) - (added) llvm/test/MC/X86/avx10_2satcvtds-32-intel.s (+1042) - (added) llvm/test/MC/X86/avx10_2satcvtds-64-att.s (+1170) - (added) llvm/test/MC/X86/avx10_2satcvtds-64-intel.s (+1170) - (modified) llvm/test/TableGen/x86-fold-tables.inc (+160) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index a696cf117908e2..a8639c341d0a43 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2122,6 +2122,36 @@ TARGET_BUILTIN(__builtin_ia32_vpdpwuud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniin TARGET_BUILTIN(__builtin_ia32_vpdpwuuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256") TARGET_BUILTIN(__builtin_ia32_vpdpwuuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256") +// AVX10.2 SATCVT-DS +TARGET_BUILTIN(__builtin_ia32_vcvttssd2si32, "iV2dIi", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttssd2usi32, "UiV2dIi", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttsss2si32, "iV4fIi", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttsss2usi32, "UiV4fIi", "ncV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs128_mask, "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs128_mask, "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256") +TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512") + // AVX-NE-CONVERT TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps128, "V4fyC*", "nV:128:", "avxneconvert") TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps256, "V8fyC*", "nV:256:", "avxneconvert") diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def index 5e00916d4b25ae..ed9b17b8bd7b8e 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.def +++ b/clang/include/clang/Basic/BuiltinsX86_64.def @@ -99,6 +99,12 @@ TARGET_BUILTIN(__builtin_ia32_vcvttsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri") +// AVX10.2 SATCVT-DS +TARGET_BUILTIN(__builtin_ia32_vcvttssd2si64, "OiV2dIi", "ncV:128:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvttssd2usi64, "UOiV2dIi", "ncV:128:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvttsss2si64, "OiV4fIi", "ncV:128:", "avx10.2-512") +TARGET_BUILTIN(__builtin_ia32_vcvttsss2usi64, "UOiV4fIi", "ncV:128:", "avx10.2-512") + // UINTR TARGET_BUILTIN(__builtin_ia32_clui, "v", "n", "uintr") TARGET_BUILTIN(__builtin_ia32_stui, "v", "n", "uintr") diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index b61aeca6bbc910..9981290628697c 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -150,9 +150,11 @@ set(x86_files avx10_2_512minmaxintrin.h avx10_2_512niintrin.h avx10_2_512satcvtintrin.h + avx10_2_512satcvtdsintrin.h avx10_2minmaxintrin.h avx10_2niintrin.h avx10_2satcvtintrin.h + avx10_2satcvtdsintrin.h avx2intrin.h avx512bf16intrin.h avx512bitalgintrin.h diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h new file mode 100644 index 00000000000000..e8b815653c3d6e --- /dev/null +++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h @@ -0,0 +1,302 @@ +/*===----- avx10_2_512satcvtdsintrin.h - AVX10_2_512SATCVTDS intrinsics ----=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx10_2_512satcvtdsintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX10_2_512SATCVTDSINTRIN_H +#define __AVX10_2_512SATCVTDSINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __min_vector_width__(512))) + +// 512 bit : Double -> Int +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d A) { + return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( + (__v8df)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvttspd_epi32(__m256i W, __mmask8 U, __m512d A) { + return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( + (__v8df)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttspd_epi32(__mmask8 U, __m512d A) { + return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( + (__v8df)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundpd_epi32(A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8si)_mm256_undefined_si256(), (__mmask8) - 1, \ + (const int)(R))) + +#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8si)(__m256i)(W), (__mmask8)(U), \ + (const int)(R))) + +#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \ + (const int)(R))) + +// 512 bit : Double -> uInt +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d A) { + return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( + (__v8df)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvttspd_epu32(__m256i W, __mmask8 U, __m512d A) { + return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( + (__v8df)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttspd_epu32(__mmask8 U, __m512d A) { + return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( + (__v8df)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundpd_epu32(A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8si)_mm256_undefined_si256(), (__mmask8) - 1, \ + (const int)(R))) + +#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8si)(__m256i)(W), (__mmask8)(U), \ + (const int)(R))) + +#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \ + (const int)(R))) + +#ifdef __x86_64__ +// 512 bit : Double -> Long + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d A) { + return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( + (__v8df)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttspd_epi64(__m512i W, __mmask8 U, __m512d A) { + return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( + (__v8df)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION)); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttspd_epi64(__mmask8 U, __m512d A) { + return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( + (__v8df)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundpd_epi64(A, R) \ + ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \ + (const int)(R))) + +#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \ + (const int)(R))) + +#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \ + (const int)(R))) + +// 512 bit : Double -> ULong + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d A) { + return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( + (__v8df)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttspd_epu64(__m512i W, __mmask8 U, __m512d A) { + return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( + (__v8df)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttspd_epu64(__mmask8 U, __m512d A) { + return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( + (__v8df)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundpd_epu64(A, R) \ + ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \ + (const int)(R))) + +#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \ + (const int)(R))) + +#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \ + (__v8df)(__m512d)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \ + (const int)(R))) + +#endif + +// 512 bit: Float -> int +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 A) { + return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( + (__v16sf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttsps_epi32(__m512i W, __mmask16 U, __m512 A) { + return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( + (__v16sf)(A), (__v16si)(W), U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttsps_epi32(__mmask16 U, __m512 A) { + return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( + (__v16sf)(A), (__v16si)_mm512_setzero_si512(), U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundps_epi32(A, R) \ + ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \ + (__v16sf)(__m512)(A), (__v16si)_mm512_undefined_epi32(), \ + (__mmask16) - 1, (const int)(R))) + +#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \ + (__v16sf)(__m512)(A), (__v16si)(__m512i)(W), (__mmask16)(U), \ + (const int)(R))) + +#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \ + (__v16sf)(__m512)(A), (__v16si)_mm512_setzero_si512(), (__mmask16)(U), \ + (const int)(R))) + +// 512 bit: Float -> uint +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 A) { + return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( + (__v16sf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttsps_epu32(__m512i W, __mmask16 U, __m512 A) { + return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( + (__v16sf)(A), (__v16si)(W), U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttsps_epu32(__mmask16 U, __m512 A) { + return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( + (__v16sf)(A), (__v16si)_mm512_setzero_si512(), U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundps_epu32(A, R) \ + ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \ + (__v16sf)(__m512)(A), (__v16si)_mm512_undefined_epi32(), \ + (__mmask16) - 1, (const int)(R))) + +#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \ + (__v16sf)(__m512)(A), (__v16si)(__m512i)(W), (__mmask16)(U), \ + (const int)(R))) + +#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \ + (__v16sf)(__m512)(A), (__v16si)_mm512_setzero_si512(), (__mmask16)(U), \ + (const int)(R))) + +#ifdef __x86_64__ +// 512 bit : float -> long +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 A) { + return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( + (__v8sf)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttsps_epi64(__m512i W, __mmask8 U, __m256 A) { + return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( + (__v8sf)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttsps_epi64(__mmask8 U, __m256 A) { + return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( + (__v8sf)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundps_epi64(A, R) \ + ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \ + (__v8sf)(__m256)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \ + (const int)(R))) + +#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \ + (__v8sf)(__m256)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \ + (const int)(R))) + +#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \ + (__v8sf)(__m256)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \ + (const int)(R))) + +// 512 bit : float -> ulong +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 A) { + return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( + (__v8sf)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttsps_epu64(__m512i W, __mmask8 U, __m256 A) { + return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( + (__v8sf)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttsps_epu64(__mmask8 U, __m256 A) { + return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( + (__v8sf)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundps_epu64(A, R) \ + ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \ + (__v8sf)(__m256)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \ + (const int)(R))) + +#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \ + (__v8sf)(__m256)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \ + (const int)(R))) + +#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \ + (__v8sf)(__m256)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \ + (const int)(R))) +#endif + +#undef __DEFAULT_FN_ATTRS +#endif // __AVX10_2_512SATCVTDSINTRIN_H diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h new file mode 100644 index 00000000000000..5588c9ccfa4319 --- /dev/null +++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h @@ -0,0 +1,453 @@ +/*===----------- avx10_2satcvtdsintrin.h - AVX512SATCVTDS intrinsics --------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx10_2satcvtdsintrin.h> directly; include <immintrin.h> instead." +#endif // __IMMINTRIN_H + +#ifndef __AVX10_2SATCVTDSINTRIN_H +#define __AVX10_2SATCVTDSINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(256))) + +#define _mm_cvtt_roundssd_i32(A, R) \ + ((int)__builtin_ia32_vcvttssd2si32((__v2df)(__m128)(A), (const int)(R))) + +#define _mm_cvtt_roundssd_si32(A, R) \ + ((int)__builtin_ia32_vcvttssd2si32... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/102592 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits