https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/84136
>From 2ba698b222fa1dc963d21850d5931562aa65533d Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Tue, 26 Sep 2023 16:44:01 +0800 Subject: [PATCH 1/5] [X86] Change target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2 --- clang/include/clang/Basic/BuiltinsX86.def | 8 +- clang/lib/Headers/avxintrin.h | 247 ------------------ clang/lib/Headers/emmintrin.h | 121 +++++++++ clang/lib/Headers/xmmintrin.h | 128 +++++++++ clang/test/CodeGen/X86/avx-builtins.c | 96 ------- .../test/CodeGen/X86/cmp-avx-builtins-error.c | 22 ++ clang/test/CodeGen/X86/sse-builtins.c | 54 ++++ clang/test/CodeGen/X86/sse2-builtins.c | 54 ++++ clang/test/CodeGen/target-features-error-2.c | 4 +- 9 files changed, 385 insertions(+), 349 deletions(-) create mode 100644 clang/test/CodeGen/X86/cmp-avx-builtins-error.c diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 207cde0414b54e..eafcc219c10966 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -226,6 +226,8 @@ TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2") @@ -243,6 +245,8 @@ TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2") @@ -462,12 +466,8 @@ TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx") TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx") TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx") TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx") diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index f116d8bc3a94c7..d6192518ea24ba 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -1573,15 +1573,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) ((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (int)(mask))) -/* Compare */ -#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ -#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ -#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ -#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ -#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ -#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ -#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ -#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */ #define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ #define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */ #define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ @@ -1607,126 +1598,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) #define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ #define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ -/// Compares each of the corresponding double-precision values of two -/// 128-bit vectors of [2 x double], using the operation specified by the -/// immediate integer operand. -/// -/// Returns a [2 x double] vector consisting of two doubles corresponding to -/// the two comparison results: zero if the comparison is false, and all 1's -/// if the comparison is true. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c); -/// \endcode -/// -/// This intrinsic corresponds to the <c> VCMPPD </c> instruction. -/// -/// \param a -/// A 128-bit vector of [2 x double]. -/// \param b -/// A 128-bit vector of [2 x double]. -/// \param c -/// An immediate integer operand, with bits [4:0] specifying which comparison -/// operation to use: \n -/// 0x00: Equal (ordered, non-signaling) \n -/// 0x01: Less-than (ordered, signaling) \n -/// 0x02: Less-than-or-equal (ordered, signaling) \n -/// 0x03: Unordered (non-signaling) \n -/// 0x04: Not-equal (unordered, non-signaling) \n -/// 0x05: Not-less-than (unordered, signaling) \n -/// 0x06: Not-less-than-or-equal (unordered, signaling) \n -/// 0x07: Ordered (non-signaling) \n -/// 0x08: Equal (unordered, non-signaling) \n -/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n -/// 0x0A: Not-greater-than (unordered, signaling) \n -/// 0x0B: False (ordered, non-signaling) \n -/// 0x0C: Not-equal (ordered, non-signaling) \n -/// 0x0D: Greater-than-or-equal (ordered, signaling) \n -/// 0x0E: Greater-than (ordered, signaling) \n -/// 0x0F: True (unordered, non-signaling) \n -/// 0x10: Equal (ordered, signaling) \n -/// 0x11: Less-than (ordered, non-signaling) \n -/// 0x12: Less-than-or-equal (ordered, non-signaling) \n -/// 0x13: Unordered (signaling) \n -/// 0x14: Not-equal (unordered, signaling) \n -/// 0x15: Not-less-than (unordered, non-signaling) \n -/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n -/// 0x17: Ordered (signaling) \n -/// 0x18: Equal (unordered, signaling) \n -/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n -/// 0x1A: Not-greater-than (unordered, non-signaling) \n -/// 0x1B: False (ordered, signaling) \n -/// 0x1C: Not-equal (ordered, signaling) \n -/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n -/// 0x1E: Greater-than (ordered, non-signaling) \n -/// 0x1F: True (unordered, signaling) -/// \returns A 128-bit vector of [2 x double] containing the comparison results. -#define _mm_cmp_pd(a, b, c) \ - ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \ - (__v2df)(__m128d)(b), (c))) - -/// Compares each of the corresponding values of two 128-bit vectors of -/// [4 x float], using the operation specified by the immediate integer -/// operand. -/// -/// Returns a [4 x float] vector consisting of four floats corresponding to -/// the four comparison results: zero if the comparison is false, and all 1's -/// if the comparison is true. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c); -/// \endcode -/// -/// This intrinsic corresponds to the <c> VCMPPS </c> instruction. -/// -/// \param a -/// A 128-bit vector of [4 x float]. -/// \param b -/// A 128-bit vector of [4 x float]. -/// \param c -/// An immediate integer operand, with bits [4:0] specifying which comparison -/// operation to use: \n -/// 0x00: Equal (ordered, non-signaling) \n -/// 0x01: Less-than (ordered, signaling) \n -/// 0x02: Less-than-or-equal (ordered, signaling) \n -/// 0x03: Unordered (non-signaling) \n -/// 0x04: Not-equal (unordered, non-signaling) \n -/// 0x05: Not-less-than (unordered, signaling) \n -/// 0x06: Not-less-than-or-equal (unordered, signaling) \n -/// 0x07: Ordered (non-signaling) \n -/// 0x08: Equal (unordered, non-signaling) \n -/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n -/// 0x0A: Not-greater-than (unordered, signaling) \n -/// 0x0B: False (ordered, non-signaling) \n -/// 0x0C: Not-equal (ordered, non-signaling) \n -/// 0x0D: Greater-than-or-equal (ordered, signaling) \n -/// 0x0E: Greater-than (ordered, signaling) \n -/// 0x0F: True (unordered, non-signaling) \n -/// 0x10: Equal (ordered, signaling) \n -/// 0x11: Less-than (ordered, non-signaling) \n -/// 0x12: Less-than-or-equal (ordered, non-signaling) \n -/// 0x13: Unordered (signaling) \n -/// 0x14: Not-equal (unordered, signaling) \n -/// 0x15: Not-less-than (unordered, non-signaling) \n -/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n -/// 0x17: Ordered (signaling) \n -/// 0x18: Equal (unordered, signaling) \n -/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n -/// 0x1A: Not-greater-than (unordered, non-signaling) \n -/// 0x1B: False (ordered, signaling) \n -/// 0x1C: Not-equal (ordered, signaling) \n -/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n -/// 0x1E: Greater-than (ordered, non-signaling) \n -/// 0x1F: True (unordered, signaling) -/// \returns A 128-bit vector of [4 x float] containing the comparison results. -#define _mm_cmp_ps(a, b, c) \ - ((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \ - (__v4sf)(__m128)(b), (c))) - /// Compares each of the corresponding double-precision values of two /// 256-bit vectors of [4 x double], using the operation specified by the /// immediate integer operand. @@ -1847,124 +1718,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) ((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (c))) -/// Compares each of the corresponding scalar double-precision values of -/// two 128-bit vectors of [2 x double], using the operation specified by the -/// immediate integer operand. -/// -/// If the result is true, all 64 bits of the destination vector are set; -/// otherwise they are cleared. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c); -/// \endcode -/// -/// This intrinsic corresponds to the <c> VCMPSD </c> instruction. -/// -/// \param a -/// A 128-bit vector of [2 x double]. -/// \param b -/// A 128-bit vector of [2 x double]. -/// \param c -/// An immediate integer operand, with bits [4:0] specifying which comparison -/// operation to use: \n -/// 0x00: Equal (ordered, non-signaling) \n -/// 0x01: Less-than (ordered, signaling) \n -/// 0x02: Less-than-or-equal (ordered, signaling) \n -/// 0x03: Unordered (non-signaling) \n -/// 0x04: Not-equal (unordered, non-signaling) \n -/// 0x05: Not-less-than (unordered, signaling) \n -/// 0x06: Not-less-than-or-equal (unordered, signaling) \n -/// 0x07: Ordered (non-signaling) \n -/// 0x08: Equal (unordered, non-signaling) \n -/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n -/// 0x0A: Not-greater-than (unordered, signaling) \n -/// 0x0B: False (ordered, non-signaling) \n -/// 0x0C: Not-equal (ordered, non-signaling) \n -/// 0x0D: Greater-than-or-equal (ordered, signaling) \n -/// 0x0E: Greater-than (ordered, signaling) \n -/// 0x0F: True (unordered, non-signaling) \n -/// 0x10: Equal (ordered, signaling) \n -/// 0x11: Less-than (ordered, non-signaling) \n -/// 0x12: Less-than-or-equal (ordered, non-signaling) \n -/// 0x13: Unordered (signaling) \n -/// 0x14: Not-equal (unordered, signaling) \n -/// 0x15: Not-less-than (unordered, non-signaling) \n -/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n -/// 0x17: Ordered (signaling) \n -/// 0x18: Equal (unordered, signaling) \n -/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n -/// 0x1A: Not-greater-than (unordered, non-signaling) \n -/// 0x1B: False (ordered, signaling) \n -/// 0x1C: Not-equal (ordered, signaling) \n -/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n -/// 0x1E: Greater-than (ordered, non-signaling) \n -/// 0x1F: True (unordered, signaling) -/// \returns A 128-bit vector of [2 x double] containing the comparison results. -#define _mm_cmp_sd(a, b, c) \ - ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \ - (__v2df)(__m128d)(b), (c))) - -/// Compares each of the corresponding scalar values of two 128-bit -/// vectors of [4 x float], using the operation specified by the immediate -/// integer operand. -/// -/// If the result is true, all 32 bits of the destination vector are set; -/// otherwise they are cleared. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c); -/// \endcode -/// -/// This intrinsic corresponds to the <c> VCMPSS </c> instruction. -/// -/// \param a -/// A 128-bit vector of [4 x float]. -/// \param b -/// A 128-bit vector of [4 x float]. -/// \param c -/// An immediate integer operand, with bits [4:0] specifying which comparison -/// operation to use: \n -/// 0x00: Equal (ordered, non-signaling) \n -/// 0x01: Less-than (ordered, signaling) \n -/// 0x02: Less-than-or-equal (ordered, signaling) \n -/// 0x03: Unordered (non-signaling) \n -/// 0x04: Not-equal (unordered, non-signaling) \n -/// 0x05: Not-less-than (unordered, signaling) \n -/// 0x06: Not-less-than-or-equal (unordered, signaling) \n -/// 0x07: Ordered (non-signaling) \n -/// 0x08: Equal (unordered, non-signaling) \n -/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n -/// 0x0A: Not-greater-than (unordered, signaling) \n -/// 0x0B: False (ordered, non-signaling) \n -/// 0x0C: Not-equal (ordered, non-signaling) \n -/// 0x0D: Greater-than-or-equal (ordered, signaling) \n -/// 0x0E: Greater-than (ordered, signaling) \n -/// 0x0F: True (unordered, non-signaling) \n -/// 0x10: Equal (ordered, signaling) \n -/// 0x11: Less-than (ordered, non-signaling) \n -/// 0x12: Less-than-or-equal (ordered, non-signaling) \n -/// 0x13: Unordered (signaling) \n -/// 0x14: Not-equal (unordered, signaling) \n -/// 0x15: Not-less-than (unordered, non-signaling) \n -/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n -/// 0x17: Ordered (signaling) \n -/// 0x18: Equal (unordered, signaling) \n -/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n -/// 0x1A: Not-greater-than (unordered, non-signaling) \n -/// 0x1B: False (ordered, signaling) \n -/// 0x1C: Not-equal (ordered, signaling) \n -/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n -/// 0x1E: Greater-than (ordered, non-signaling) \n -/// 0x1F: True (unordered, signaling) -/// \returns A 128-bit vector of [4 x float] containing the comparison results. -#define _mm_cmp_ss(a, b, c) \ - ((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \ - (__v4sf)(__m128)(b), (c))) - /// Takes a [8 x i32] vector and returns the vector element value /// indexed by the immediate constant operand. /// diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 1d451b5f5b25de..db6d43ea4ad6fe 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4745,6 +4745,127 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { return (__m128d)__a; } +/// Compares each of the corresponding double-precision values of two +/// 128-bit vectors of [2 x double], using the operation specified by the +/// immediate integer operand. +/// +/// Returns a [2 x double] vector consisting of two doubles corresponding to +/// the two comparison results: zero if the comparison is false, and all 1's +/// if the comparison is true. +/// +/// \headerfile <x86intrin.h> +/// +/// \code +/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c); +/// \endcode +/// +/// This intrinsic corresponds to the <c> (V)CMPPD </c> instruction. +/// +/// \param a +/// A 128-bit vector of [2 x double]. +/// \param b +/// A 128-bit vector of [2 x double]. +/// \param c +/// An immediate integer operand, with bits [4:0] specifying which comparison +/// operation to use: \n +/// (Note that without avx enabled, only bits [2:0] are supported) \n +/// 0x00: Equal (ordered, non-signaling) \n +/// 0x01: Less-than (ordered, signaling) \n +/// 0x02: Less-than-or-equal (ordered, signaling) \n +/// 0x03: Unordered (non-signaling) \n +/// 0x04: Not-equal (unordered, non-signaling) \n +/// 0x05: Not-less-than (unordered, signaling) \n +/// 0x06: Not-less-than-or-equal (unordered, signaling) \n +/// 0x07: Ordered (non-signaling) \n +/// 0x08: Equal (unordered, non-signaling) \n +/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n +/// 0x0A: Not-greater-than (unordered, signaling) \n +/// 0x0B: False (ordered, non-signaling) \n +/// 0x0C: Not-equal (ordered, non-signaling) \n +/// 0x0D: Greater-than-or-equal (ordered, signaling) \n +/// 0x0E: Greater-than (ordered, signaling) \n +/// 0x0F: True (unordered, non-signaling) \n +/// 0x10: Equal (ordered, signaling) \n +/// 0x11: Less-than (ordered, non-signaling) \n +/// 0x12: Less-than-or-equal (ordered, non-signaling) \n +/// 0x13: Unordered (signaling) \n +/// 0x14: Not-equal (unordered, signaling) \n +/// 0x15: Not-less-than (unordered, non-signaling) \n +/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n +/// 0x17: Ordered (signaling) \n +/// 0x18: Equal (unordered, signaling) \n +/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n +/// 0x1A: Not-greater-than (unordered, non-signaling) \n +/// 0x1B: False (ordered, signaling) \n +/// 0x1C: Not-equal (ordered, signaling) \n +/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n +/// 0x1E: Greater-than (ordered, non-signaling) \n +/// 0x1F: True (unordered, signaling) +/// \returns A 128-bit vector of [2 x double] containing the comparison results. +#define _mm_cmp_pd(a, b, c) \ + ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ + (c))) + +/// Compares each of the corresponding scalar double-precision values of +/// two 128-bit vectors of [2 x double], using the operation specified by the +/// immediate integer operand. +/// +/// If the result is true, all 64 bits of the destination vector are set; +/// otherwise they are cleared. +/// +/// \headerfile <x86intrin.h> +/// +/// \code +/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c); +/// \endcode +/// +/// This intrinsic corresponds to the <c> (V)CMPSD </c> instruction. +/// +/// \param a +/// A 128-bit vector of [2 x double]. +/// \param b +/// A 128-bit vector of [2 x double]. +/// \param c +/// An immediate integer operand, with bits [4:0] specifying which comparison +/// operation to use: \n +/// (Note that without avx enabled, only bits [2:0] are supported) \n +/// 0x00: Equal (ordered, non-signaling) \n +/// 0x01: Less-than (ordered, signaling) \n +/// 0x02: Less-than-or-equal (ordered, signaling) \n +/// 0x03: Unordered (non-signaling) \n +/// 0x04: Not-equal (unordered, non-signaling) \n +/// 0x05: Not-less-than (unordered, signaling) \n +/// 0x06: Not-less-than-or-equal (unordered, signaling) \n +/// 0x07: Ordered (non-signaling) \n +/// 0x08: Equal (unordered, non-signaling) \n +/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n +/// 0x0A: Not-greater-than (unordered, signaling) \n +/// 0x0B: False (ordered, non-signaling) \n +/// 0x0C: Not-equal (ordered, non-signaling) \n +/// 0x0D: Greater-than-or-equal (ordered, signaling) \n +/// 0x0E: Greater-than (ordered, signaling) \n +/// 0x0F: True (unordered, non-signaling) \n +/// 0x10: Equal (ordered, signaling) \n +/// 0x11: Less-than (ordered, non-signaling) \n +/// 0x12: Less-than-or-equal (ordered, non-signaling) \n +/// 0x13: Unordered (signaling) \n +/// 0x14: Not-equal (unordered, signaling) \n +/// 0x15: Not-less-than (unordered, non-signaling) \n +/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n +/// 0x17: Ordered (signaling) \n +/// 0x18: Equal (unordered, signaling) \n +/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n +/// 0x1A: Not-greater-than (unordered, non-signaling) \n +/// 0x1B: False (ordered, signaling) \n +/// 0x1C: Not-equal (ordered, signaling) \n +/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n +/// 0x1E: Greater-than (ordered, non-signaling) \n +/// 0x1F: True (unordered, signaling) +/// \returns A 128-bit vector of [2 x double] containing the comparison results. +#define _mm_cmp_sd(a, b, c) \ + ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ + (c))) + #if defined(__cplusplus) extern "C" { #endif diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 47368f3c23d2d6..f9894c23ce25bf 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2940,6 +2940,134 @@ _mm_movemask_ps(__m128 __a) return __builtin_ia32_movmskps((__v4sf)__a); } +/* Compare */ +#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ +#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ +#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ +#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ +#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ +#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ +#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ +#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */ + +/// Compares each of the corresponding values of two 128-bit vectors of +/// [4 x float], using the operation specified by the immediate integer +/// operand. +/// +/// Returns a [4 x float] vector consisting of four floats corresponding to +/// the four comparison results: zero if the comparison is false, and all 1's +/// if the comparison is true. +/// +/// \headerfile <x86intrin.h> +/// +/// \code +/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c); +/// \endcode +/// +/// This intrinsic corresponds to the <c> (V)CMPPS </c> instruction. +/// +/// \param a +/// A 128-bit vector of [4 x float]. +/// \param b +/// A 128-bit vector of [4 x float]. +/// \param c +/// An immediate integer operand, with bits [4:0] specifying which comparison +/// operation to use: \n +/// (Note that without avx enabled, only bits [2:0] are supported) \n +/// 0x00: Equal (ordered, non-signaling) \n +/// 0x01: Less-than (ordered, signaling) \n +/// 0x02: Less-than-or-equal (ordered, signaling) \n +/// 0x03: Unordered (non-signaling) \n +/// 0x04: Not-equal (unordered, non-signaling) \n +/// 0x05: Not-less-than (unordered, signaling) \n +/// 0x06: Not-less-than-or-equal (unordered, signaling) \n +/// 0x07: Ordered (non-signaling) \n +/// 0x08: Equal (unordered, non-signaling) \n +/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n +/// 0x0A: Not-greater-than (unordered, signaling) \n +/// 0x0B: False (ordered, non-signaling) \n +/// 0x0C: Not-equal (ordered, non-signaling) \n +/// 0x0D: Greater-than-or-equal (ordered, signaling) \n +/// 0x0E: Greater-than (ordered, signaling) \n +/// 0x0F: True (unordered, non-signaling) \n +/// 0x10: Equal (ordered, signaling) \n +/// 0x11: Less-than (ordered, non-signaling) \n +/// 0x12: Less-than-or-equal (ordered, non-signaling) \n +/// 0x13: Unordered (signaling) \n +/// 0x14: Not-equal (unordered, signaling) \n +/// 0x15: Not-less-than (unordered, non-signaling) \n +/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n +/// 0x17: Ordered (signaling) \n +/// 0x18: Equal (unordered, signaling) \n +/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n +/// 0x1A: Not-greater-than (unordered, non-signaling) \n +/// 0x1B: False (ordered, signaling) \n +/// 0x1C: Not-equal (ordered, signaling) \n +/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n +/// 0x1E: Greater-than (ordered, non-signaling) \n +/// 0x1F: True (unordered, signaling) +/// \returns A 128-bit vector of [4 x float] containing the comparison results. +#define _mm_cmp_ps(a, b, c) \ + ((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c))) + +/// Compares each of the corresponding scalar values of two 128-bit +/// vectors of [4 x float], using the operation specified by the immediate +/// integer operand. +/// +/// If the result is true, all 32 bits of the destination vector are set; +/// otherwise they are cleared. +/// +/// \headerfile <x86intrin.h> +/// +/// \code +/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c); +/// \endcode +/// +/// This intrinsic corresponds to the <c> (V)CMPSS </c> instruction. +/// +/// \param a +/// A 128-bit vector of [4 x float]. +/// \param b +/// A 128-bit vector of [4 x float]. +/// \param c +/// An immediate integer operand, with bits [4:0] specifying which comparison +/// operation to use: \n +/// (Note that without avx enabled, only bits [2:0] are supported) \n +/// 0x00: Equal (ordered, non-signaling) \n +/// 0x01: Less-than (ordered, signaling) \n +/// 0x02: Less-than-or-equal (ordered, signaling) \n +/// 0x03: Unordered (non-signaling) \n +/// 0x04: Not-equal (unordered, non-signaling) \n +/// 0x05: Not-less-than (unordered, signaling) \n +/// 0x06: Not-less-than-or-equal (unordered, signaling) \n +/// 0x07: Ordered (non-signaling) \n +/// 0x08: Equal (unordered, non-signaling) \n +/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n +/// 0x0A: Not-greater-than (unordered, signaling) \n +/// 0x0B: False (ordered, non-signaling) \n +/// 0x0C: Not-equal (ordered, non-signaling) \n +/// 0x0D: Greater-than-or-equal (ordered, signaling) \n +/// 0x0E: Greater-than (ordered, signaling) \n +/// 0x0F: True (unordered, non-signaling) \n +/// 0x10: Equal (ordered, signaling) \n +/// 0x11: Less-than (ordered, non-signaling) \n +/// 0x12: Less-than-or-equal (ordered, non-signaling) \n +/// 0x13: Unordered (signaling) \n +/// 0x14: Not-equal (unordered, signaling) \n +/// 0x15: Not-less-than (unordered, non-signaling) \n +/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n +/// 0x17: Ordered (signaling) \n +/// 0x18: Equal (unordered, signaling) \n +/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n +/// 0x1A: Not-greater-than (unordered, non-signaling) \n +/// 0x1B: False (ordered, signaling) \n +/// 0x1C: Not-equal (ordered, signaling) \n +/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n +/// 0x1E: Greater-than (ordered, non-signaling) \n +/// 0x1F: True (unordered, signaling) +/// \returns A 128-bit vector of [4 x float] containing the comparison results. +#define _mm_cmp_ss(a, b, c) \ + ((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c))) #define _MM_ALIGN16 __attribute__((aligned(16))) diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index d50366c3a022cc..4bf1213d9fca97 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -596,54 +596,6 @@ __m256 test_mm256_cmp_ps_true_us(__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_TRUE_US); } -__m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_eq_oq - // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_EQ_OQ); -} - -__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_lt_os - // CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_LT_OS); -} - -__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_le_os - // CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_LE_OS); -} - -__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_unord_q - // CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_UNORD_Q); -} - -__m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_neq_uq - // CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_NEQ_UQ); -} - -__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_nlt_us - // CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_NLT_US); -} - -__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_nle_us - // CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_NLE_US); -} - -__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_ord_q - // CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_ORD_Q); -} - __m128d test_mm_cmp_pd_eq_uq(__m128d a, __m128d b) { // CHECK-LABEL: test_mm_cmp_pd_eq_uq // CHECK: fcmp ueq <2 x double> %{{.*}}, %{{.*}} @@ -788,54 +740,6 @@ __m128d test_mm_cmp_pd_true_us(__m128d a, __m128d b) { return _mm_cmp_pd(a, b, _CMP_TRUE_US); } -__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_eq_oq - // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_EQ_OQ); -} - -__m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_lt_os - // CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_LT_OS); -} - -__m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_le_os - // CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_LE_OS); -} - -__m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_unord_q - // CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_UNORD_Q); -} - -__m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_neq_uq - // CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_NEQ_UQ); -} - -__m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_nlt_us - // CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_NLT_US); -} - -__m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_nle_us - // CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_NLE_US); -} - -__m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_ord_q - // CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_ORD_Q); -} - __m128 test_mm_cmp_ps_eq_uq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_eq_uq // CHECK: fcmp ueq <4 x float> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/X86/cmp-avx-builtins-error.c b/clang/test/CodeGen/X86/cmp-avx-builtins-error.c new file mode 100644 index 00000000000000..2b35aa84492b64 --- /dev/null +++ b/clang/test/CodeGen/X86/cmp-avx-builtins-error.c @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \ +// RUN: -target-feature +avx -emit-llvm -fsyntax-only -verify +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown \ +// RUN: -target-feature +avx -emit-llvm -fsyntax-only -verify + +#include <immintrin.h> + +__m128d test_mm_cmp_pd(__m128d a, __m128d b) { + return _mm_cmp_pd(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +} + +__m128d test_mm_cmp_sd(__m128d a, __m128d b) { + return _mm_cmp_sd(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +} + +__m128 test_mm_cmp_ps(__m128 a, __m128 b) { + return _mm_cmp_pd(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +} + +__m128 test_mm_cmp_ss(__m128 a, __m128 b) { + return _mm_cmp_sd(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +} diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 830081296e52fb..9494fa46d54aed 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -813,3 +813,57 @@ __m128 test_mm_xor_ps(__m128 A, __m128 B) { // CHECK: xor <4 x i32> return _mm_xor_ps(A, B); } + +__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_eq_oq + // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_EQ_OQ); +} + +__m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_lt_os + // CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_LT_OS); +} + +__m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_le_os + // CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_LE_OS); +} + +__m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_unord_q + // CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_UNORD_Q); +} + +__m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_neq_uq + // CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_NEQ_UQ); +} + +__m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_nlt_us + // CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_NLT_US); +} + +__m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_nle_us + // CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_NLE_US); +} + +__m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_ord_q + // CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_ORD_Q); +} + +__m128 test_mm_cmp_ss(__m128 A, __m128 B) { + // CHECK-LABEL: test_mm_cmp_ss + // CHECK: call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7) + return _mm_cmp_ss(A, B, _CMP_ORD_Q); +} diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 44b2e485a5bd85..1189fb964dff07 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1719,3 +1719,57 @@ __m128i test_mm_xor_si128(__m128i A, __m128i B) { // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} return _mm_xor_si128(A, B); } + +__m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_eq_oq + // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_EQ_OQ); +} + +__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_lt_os + // CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_LT_OS); +} + +__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_le_os + // CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_LE_OS); +} + +__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_unord_q + // CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_UNORD_Q); +} + +__m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_neq_uq + // CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_NEQ_UQ); +} + +__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_nlt_us + // CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_NLT_US); +} + +__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_nle_us + // CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_NLE_US); +} + +__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_ord_q + // CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_ORD_Q); +} + +__m128d test_mm_cmp_sd(__m128d A, __m128d B) { + // CHECK-LABEL: test_mm_cmp_sd + // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7) + return _mm_cmp_sd(A, B, _CMP_ORD_Q); +} diff --git a/clang/test/CodeGen/target-features-error-2.c b/clang/test/CodeGen/target-features-error-2.c index 60586fb57f1c04..4f8bc8712aa51d 100644 --- a/clang/test/CodeGen/target-features-error-2.c +++ b/clang/test/CodeGen/target-features-error-2.c @@ -14,8 +14,8 @@ int baz(__m256i a) { #endif #if NEED_AVX_2 -__m128 need_avx(__m128 a, __m128 b) { - return _mm_cmp_ps(a, b, 0); // expected-error {{'__builtin_ia32_cmpps' needs target feature avx}} +__m256 need_avx(__m256 a, __m256 b) { + return _mm256_cmp_ps(a, b, 0); // expected-error {{'__builtin_ia32_cmpps256' needs target feature avx}} } #endif >From 54c78b5ad694946186ba115576a6eea9c8992349 Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Mon, 26 Feb 2024 09:51:07 +0800 Subject: [PATCH 2/5] Handle attribute/target --- clang/include/clang/Basic/BuiltinsX86.def | 8 ++++---- clang/lib/CodeGen/CodeGenFunction.cpp | 19 +++++++++++++++++++ .../CodeGen/X86/attribute-cmpsd-no-error.c | 11 +++++++++++ clang/test/CodeGen/target-features-error-2.c | 4 ++-- 4 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/X86/attribute-cmpsd-no-error.c diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index eafcc219c10966..d141d51a509e8d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -226,8 +226,6 @@ TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2") @@ -245,8 +243,6 @@ TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2") @@ -466,8 +462,12 @@ TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx|sse2") TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx|sse") TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx|sse2") +TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx|sse") TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx") diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index b87fc86f4e635f..f6544452ffb7c1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -31,6 +31,7 @@ #include "clang/AST/StmtObjC.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/FrontendDiagnostic.h" @@ -2613,6 +2614,24 @@ void CGBuilderInserter::InsertHelper( // called function. void CodeGenFunction::checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl) { + // SemaCheking cannot handle below x86 builtins because they have different + // parameter ranges with different TargetAttribute of caller. + if (CGM.getContext().getTargetInfo().getTriple().isX86()) { + unsigned BuiltinID = TargetDecl->getBuiltinID(); + if (BuiltinID == X86::BI__builtin_ia32_cmpps || + BuiltinID == X86::BI__builtin_ia32_cmpss || + BuiltinID == X86::BI__builtin_ia32_cmppd || + BuiltinID == X86::BI__builtin_ia32_cmpsd) { + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl); + llvm::StringMap<bool> TargetFetureMap; + CGM.getContext().getFunctionFeatureMap(TargetFetureMap, FD); + llvm::APSInt Result = + *(E->getArg(2)->getIntegerConstantExpr(CGM.getContext())); + if (Result.getSExtValue() > 7 && !TargetFetureMap.lookup("avx")) + CGM.getDiags().Report(E->getBeginLoc(), diag::err_builtin_needs_feature) + << TargetDecl->getDeclName() << "avx"; + } + } return checkTargetFeatures(E->getBeginLoc(), TargetDecl); } diff --git a/clang/test/CodeGen/X86/attribute-cmpsd-no-error.c b/clang/test/CodeGen/X86/attribute-cmpsd-no-error.c new file mode 100644 index 00000000000000..a73d998155d096 --- /dev/null +++ b/clang/test/CodeGen/X86/attribute-cmpsd-no-error.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown-emit-llvm -o /dev/null -verify +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown-emit-llvm -o /dev/null -verify + +// expected-no-diagnostics + +#include <immintrin.h> + +__attribute__((target("avx"))) +__m128 test(__m128 a, __m128 b) { + return _mm_cmp_ps(a, b, 14); +} diff --git a/clang/test/CodeGen/target-features-error-2.c b/clang/test/CodeGen/target-features-error-2.c index 4f8bc8712aa51d..1599b0135b747f 100644 --- a/clang/test/CodeGen/target-features-error-2.c +++ b/clang/test/CodeGen/target-features-error-2.c @@ -14,8 +14,8 @@ int baz(__m256i a) { #endif #if NEED_AVX_2 -__m256 need_avx(__m256 a, __m256 b) { - return _mm256_cmp_ps(a, b, 0); // expected-error {{'__builtin_ia32_cmpps256' needs target feature avx}} +__m128 need_avx(__m128 a, __m128 b) { + return _mm_cmp_ps(a, b, 8); // expected-error {{'__builtin_ia32_cmpps' needs target feature avx}} } #endif >From d8979e29e1932b42bb54424e1c6aa97ba9ba07e7 Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Wed, 6 Mar 2024 20:12:10 +0800 Subject: [PATCH 3/5] Address comments. --- clang/include/clang/Basic/BuiltinsX86.def | 8 +- clang/lib/CodeGen/CodeGenFunction.cpp | 2 +- clang/test/CodeGen/X86/sse-builtins.c | 108 +++++++++++----------- clang/test/CodeGen/X86/sse2-builtins.c | 108 +++++++++++----------- 4 files changed, 113 insertions(+), 113 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index d141d51a509e8d..eafcc219c10966 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -226,6 +226,8 @@ TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2") @@ -243,6 +245,8 @@ TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2") @@ -462,12 +466,8 @@ TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx|sse2") TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx|sse") TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx|sse2") -TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx|sse") TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx") diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index f6544452ffb7c1..4a3ff49b0007a3 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2614,7 +2614,7 @@ void CGBuilderInserter::InsertHelper( // called function. void CodeGenFunction::checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl) { - // SemaCheking cannot handle below x86 builtins because they have different + // SemaChecking cannot handle below x86 builtins because they have different // parameter ranges with different TargetAttribute of caller. if (CGM.getContext().getTargetInfo().getTriple().isX86()) { unsigned BuiltinID = TargetDecl->getBuiltinID(); diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 9494fa46d54aed..aae5cfb8bb1d99 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -34,6 +34,60 @@ __m128 test_mm_andnot_ps(__m128 A, __m128 B) { return _mm_andnot_ps(A, B); } +__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_eq_oq + // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_EQ_OQ); +} + +__m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_lt_os + // CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_LT_OS); +} + +__m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_le_os + // CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_LE_OS); +} + +__m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_unord_q + // CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_UNORD_Q); +} + +__m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_neq_uq + // CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_NEQ_UQ); +} + +__m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_nlt_us + // CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_NLT_US); +} + +__m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_nle_us + // CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_NLE_US); +} + +__m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) { + // CHECK-LABEL: test_mm_cmp_ps_ord_q + // CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}} + return _mm_cmp_ps(a, b, _CMP_ORD_Q); +} + +__m128 test_mm_cmp_ss(__m128 A, __m128 B) { + // CHECK-LABEL: test_mm_cmp_ss + // CHECK: call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7) + return _mm_cmp_ss(A, B, _CMP_ORD_Q); +} + __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: test_mm_cmpeq_ps // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> @@ -813,57 +867,3 @@ __m128 test_mm_xor_ps(__m128 A, __m128 B) { // CHECK: xor <4 x i32> return _mm_xor_ps(A, B); } - -__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_eq_oq - // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_EQ_OQ); -} - -__m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_lt_os - // CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_LT_OS); -} - -__m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_le_os - // CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_LE_OS); -} - -__m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_unord_q - // CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_UNORD_Q); -} - -__m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_neq_uq - // CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_NEQ_UQ); -} - -__m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_nlt_us - // CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_NLT_US); -} - -__m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_nle_us - // CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_NLE_US); -} - -__m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) { - // CHECK-LABEL: test_mm_cmp_ps_ord_q - // CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}} - return _mm_cmp_ps(a, b, _CMP_ORD_Q); -} - -__m128 test_mm_cmp_ss(__m128 A, __m128 B) { - // CHECK-LABEL: test_mm_cmp_ss - // CHECK: call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7) - return _mm_cmp_ss(A, B, _CMP_ORD_Q); -} diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 1189fb964dff07..3ca7777e763359 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -160,6 +160,60 @@ void test_mm_clflush(void* A) { _mm_clflush(A); } +__m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_eq_oq + // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_EQ_OQ); +} + +__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_lt_os + // CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_LT_OS); +} + +__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_le_os + // CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_LE_OS); +} + +__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_unord_q + // CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_UNORD_Q); +} + +__m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_neq_uq + // CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_NEQ_UQ); +} + +__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_nlt_us + // CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_NLT_US); +} + +__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_nle_us + // CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_NLE_US); +} + +__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) { + // CHECK-LABEL: test_mm_cmp_pd_ord_q + // CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}} + return _mm_cmp_pd(a, b, _CMP_ORD_Q); +} + +__m128d test_mm_cmp_sd(__m128d A, __m128d B) { + // CHECK-LABEL: test_mm_cmp_sd + // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7) + return _mm_cmp_sd(A, B, _CMP_ORD_Q); +} + __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_cmpeq_epi8 // CHECK: icmp eq <16 x i8> @@ -1719,57 +1773,3 @@ __m128i test_mm_xor_si128(__m128i A, __m128i B) { // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} return _mm_xor_si128(A, B); } - -__m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_eq_oq - // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_EQ_OQ); -} - -__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_lt_os - // CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_LT_OS); -} - -__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_le_os - // CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_LE_OS); -} - -__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_unord_q - // CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_UNORD_Q); -} - -__m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_neq_uq - // CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_NEQ_UQ); -} - -__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_nlt_us - // CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_NLT_US); -} - -__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_nle_us - // CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_NLE_US); -} - -__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) { - // CHECK-LABEL: test_mm_cmp_pd_ord_q - // CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}} - return _mm_cmp_pd(a, b, _CMP_ORD_Q); -} - -__m128d test_mm_cmp_sd(__m128d A, __m128d B) { - // CHECK-LABEL: test_mm_cmp_sd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7) - return _mm_cmp_sd(A, B, _CMP_ORD_Q); -} >From c9a3d8905c02f50a0938a88cc391ded1b623884f Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Wed, 6 Mar 2024 20:16:42 +0800 Subject: [PATCH 4/5] typo --- clang/test/CodeGen/X86/cmp-avx-builtins-error.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/X86/cmp-avx-builtins-error.c b/clang/test/CodeGen/X86/cmp-avx-builtins-error.c index 2b35aa84492b64..bad2606021b69e 100644 --- a/clang/test/CodeGen/X86/cmp-avx-builtins-error.c +++ b/clang/test/CodeGen/X86/cmp-avx-builtins-error.c @@ -14,9 +14,9 @@ __m128d test_mm_cmp_sd(__m128d a, __m128d b) { } __m128 test_mm_cmp_ps(__m128 a, __m128 b) { - return _mm_cmp_pd(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + return _mm_cmp_ps(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} } __m128 test_mm_cmp_ss(__m128 a, __m128 b) { - return _mm_cmp_sd(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + return _mm_cmp_ss(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} } >From 51eda069c797c91fd3072f347e1dd4a20ab53702 Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Thu, 7 Mar 2024 09:44:52 +0800 Subject: [PATCH 5/5] Address comments. --- clang/lib/Headers/avxintrin.h | 235 ++++++++++++++++++++++++++++++++++ clang/lib/Headers/emmintrin.h | 50 -------- clang/lib/Headers/xmmintrin.h | 50 -------- 3 files changed, 235 insertions(+), 100 deletions(-) diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index d6192518ea24ba..ecd9bf18a41ca0 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -1573,6 +1573,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) ((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (int)(mask))) +/* Compare */ #define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ #define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */ #define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ @@ -1598,6 +1599,124 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) #define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ #define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ +/* Below intrinsic defined in emmintrin.h can be used for AVX */ +/// Compares each of the corresponding double-precision values of two +/// 128-bit vectors of [2 x double], using the operation specified by the +/// immediate integer operand. +/// +/// Returns a [2 x double] vector consisting of two doubles corresponding to +/// the two comparison results: zero if the comparison is false, and all 1's +/// if the comparison is true. +/// +/// \headerfile <x86intrin.h> +/// +/// \code +/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c); +/// \endcode +/// +/// This intrinsic corresponds to the <c> VCMPPD </c> instruction. +/// +/// \param a +/// A 128-bit vector of [2 x double]. +/// \param b +/// A 128-bit vector of [2 x double]. +/// \param c +/// An immediate integer operand, with bits [4:0] specifying which comparison +/// operation to use: \n +/// 0x00: Equal (ordered, non-signaling) \n +/// 0x01: Less-than (ordered, signaling) \n +/// 0x02: Less-than-or-equal (ordered, signaling) \n +/// 0x03: Unordered (non-signaling) \n +/// 0x04: Not-equal (unordered, non-signaling) \n +/// 0x05: Not-less-than (unordered, signaling) \n +/// 0x06: Not-less-than-or-equal (unordered, signaling) \n +/// 0x07: Ordered (non-signaling) \n +/// 0x08: Equal (unordered, non-signaling) \n +/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n +/// 0x0A: Not-greater-than (unordered, signaling) \n +/// 0x0B: False (ordered, non-signaling) \n +/// 0x0C: Not-equal (ordered, non-signaling) \n +/// 0x0D: Greater-than-or-equal (ordered, signaling) \n +/// 0x0E: Greater-than (ordered, signaling) \n +/// 0x0F: True (unordered, non-signaling) \n +/// 0x10: Equal (ordered, signaling) \n +/// 0x11: Less-than (ordered, non-signaling) \n +/// 0x12: Less-than-or-equal (ordered, non-signaling) \n +/// 0x13: Unordered (signaling) \n +/// 0x14: Not-equal (unordered, signaling) \n +/// 0x15: Not-less-than (unordered, non-signaling) \n +/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n +/// 0x17: Ordered (signaling) \n +/// 0x18: Equal (unordered, signaling) \n +/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n +/// 0x1A: Not-greater-than (unordered, non-signaling) \n +/// 0x1B: False (ordered, signaling) \n +/// 0x1C: Not-equal (ordered, signaling) \n +/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n +/// 0x1E: Greater-than (ordered, non-signaling) \n +/// 0x1F: True (unordered, signaling) +/// \returns A 128-bit vector of [2 x double] containing the comparison results. +/// \fn __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c) + +/* Below intrinsic defined in xmmintrin.h can be used for AVX */ +/// Compares each of the corresponding values of two 128-bit vectors of +/// [4 x float], using the operation specified by the immediate integer +/// operand. +/// +/// Returns a [4 x float] vector consisting of four floats corresponding to +/// the four comparison results: zero if the comparison is false, and all 1's +/// if the comparison is true. +/// +/// \headerfile <x86intrin.h> +/// +/// \code +/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c); +/// \endcode +/// +/// This intrinsic corresponds to the <c> VCMPPS </c> instruction. +/// +/// \param a +/// A 128-bit vector of [4 x float]. +/// \param b +/// A 128-bit vector of [4 x float]. +/// \param c +/// An immediate integer operand, with bits [4:0] specifying which comparison +/// operation to use: \n +/// 0x00: Equal (ordered, non-signaling) \n +/// 0x01: Less-than (ordered, signaling) \n +/// 0x02: Less-than-or-equal (ordered, signaling) \n +/// 0x03: Unordered (non-signaling) \n +/// 0x04: Not-equal (unordered, non-signaling) \n +/// 0x05: Not-less-than (unordered, signaling) \n +/// 0x06: Not-less-than-or-equal (unordered, signaling) \n +/// 0x07: Ordered (non-signaling) \n +/// 0x08: Equal (unordered, non-signaling) \n +/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n +/// 0x0A: Not-greater-than (unordered, signaling) \n +/// 0x0B: False (ordered, non-signaling) \n +/// 0x0C: Not-equal (ordered, non-signaling) \n +/// 0x0D: Greater-than-or-equal (ordered, signaling) \n +/// 0x0E: Greater-than (ordered, signaling) \n +/// 0x0F: True (unordered, non-signaling) \n +/// 0x10: Equal (ordered, signaling) \n +/// 0x11: Less-than (ordered, non-signaling) \n +/// 0x12: Less-than-or-equal (ordered, non-signaling) \n +/// 0x13: Unordered (signaling) \n +/// 0x14: Not-equal (unordered, signaling) \n +/// 0x15: Not-less-than (unordered, non-signaling) \n +/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n +/// 0x17: Ordered (signaling) \n +/// 0x18: Equal (unordered, signaling) \n +/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n +/// 0x1A: Not-greater-than (unordered, non-signaling) \n +/// 0x1B: False (ordered, signaling) \n +/// 0x1C: Not-equal (ordered, signaling) \n +/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n +/// 0x1E: Greater-than (ordered, non-signaling) \n +/// 0x1F: True (unordered, signaling) +/// \returns A 128-bit vector of [4 x float] containing the comparison results. +/// \fn __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c) + /// Compares each of the corresponding double-precision values of two /// 256-bit vectors of [4 x double], using the operation specified by the /// immediate integer operand. @@ -1718,6 +1837,122 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) ((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (c))) +/* Below intrinsic defined in emmintrin.h can be used for AVX */ +/// Compares each of the corresponding scalar double-precision values of +/// two 128-bit vectors of [2 x double], using the operation specified by the +/// immediate integer operand. +/// +/// If the result is true, all 64 bits of the destination vector are set; +/// otherwise they are cleared. +/// +/// \headerfile <x86intrin.h> +/// +/// \code +/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c); +/// \endcode +/// +/// This intrinsic corresponds to the <c> VCMPSD </c> instruction. +/// +/// \param a +/// A 128-bit vector of [2 x double]. +/// \param b +/// A 128-bit vector of [2 x double]. +/// \param c +/// An immediate integer operand, with bits [4:0] specifying which comparison +/// operation to use: \n +/// 0x00: Equal (ordered, non-signaling) \n +/// 0x01: Less-than (ordered, signaling) \n +/// 0x02: Less-than-or-equal (ordered, signaling) \n +/// 0x03: Unordered (non-signaling) \n +/// 0x04: Not-equal (unordered, non-signaling) \n +/// 0x05: Not-less-than (unordered, signaling) \n +/// 0x06: Not-less-than-or-equal (unordered, signaling) \n +/// 0x07: Ordered (non-signaling) \n +/// 0x08: Equal (unordered, non-signaling) \n +/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n +/// 0x0A: Not-greater-than (unordered, signaling) \n +/// 0x0B: False (ordered, non-signaling) \n +/// 0x0C: Not-equal (ordered, non-signaling) \n +/// 0x0D: Greater-than-or-equal (ordered, signaling) \n +/// 0x0E: Greater-than (ordered, signaling) \n +/// 0x0F: True (unordered, non-signaling) \n +/// 0x10: Equal (ordered, signaling) \n +/// 0x11: Less-than (ordered, non-signaling) \n +/// 0x12: Less-than-or-equal (ordered, non-signaling) \n +/// 0x13: Unordered (signaling) \n +/// 0x14: Not-equal (unordered, signaling) \n +/// 0x15: Not-less-than (unordered, non-signaling) \n +/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n +/// 0x17: Ordered (signaling) \n +/// 0x18: Equal (unordered, signaling) \n +/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n +/// 0x1A: Not-greater-than (unordered, non-signaling) \n +/// 0x1B: False (ordered, signaling) \n +/// 0x1C: Not-equal (ordered, signaling) \n +/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n +/// 0x1E: Greater-than (ordered, non-signaling) \n +/// 0x1F: True (unordered, signaling) +/// \returns A 128-bit vector of [2 x double] containing the comparison results. +/// \fn __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c) + +/* Below intrinsic defined in xmmintrin.h can be used for AVX */ +/// Compares each of the corresponding scalar values of two 128-bit +/// vectors of [4 x float], using the operation specified by the immediate +/// integer operand. +/// +/// If the result is true, all 32 bits of the destination vector are set; +/// otherwise they are cleared. +/// +/// \headerfile <x86intrin.h> +/// +/// \code +/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c); +/// \endcode +/// +/// This intrinsic corresponds to the <c> VCMPSS </c> instruction. +/// +/// \param a +/// A 128-bit vector of [4 x float]. +/// \param b +/// A 128-bit vector of [4 x float]. +/// \param c +/// An immediate integer operand, with bits [4:0] specifying which comparison +/// operation to use: \n +/// 0x00: Equal (ordered, non-signaling) \n +/// 0x01: Less-than (ordered, signaling) \n +/// 0x02: Less-than-or-equal (ordered, signaling) \n +/// 0x03: Unordered (non-signaling) \n +/// 0x04: Not-equal (unordered, non-signaling) \n +/// 0x05: Not-less-than (unordered, signaling) \n +/// 0x06: Not-less-than-or-equal (unordered, signaling) \n +/// 0x07: Ordered (non-signaling) \n +/// 0x08: Equal (unordered, non-signaling) \n +/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n +/// 0x0A: Not-greater-than (unordered, signaling) \n +/// 0x0B: False (ordered, non-signaling) \n +/// 0x0C: Not-equal (ordered, non-signaling) \n +/// 0x0D: Greater-than-or-equal (ordered, signaling) \n +/// 0x0E: Greater-than (ordered, signaling) \n +/// 0x0F: True (unordered, non-signaling) \n +/// 0x10: Equal (ordered, signaling) \n +/// 0x11: Less-than (ordered, non-signaling) \n +/// 0x12: Less-than-or-equal (ordered, non-signaling) \n +/// 0x13: Unordered (signaling) \n +/// 0x14: Not-equal (unordered, signaling) \n +/// 0x15: Not-less-than (unordered, non-signaling) \n +/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n +/// 0x17: Ordered (signaling) \n +/// 0x18: Equal (unordered, signaling) \n +/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n +/// 0x1A: Not-greater-than (unordered, non-signaling) \n +/// 0x1B: False (ordered, signaling) \n +/// 0x1C: Not-equal (ordered, signaling) \n +/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n +/// 0x1E: Greater-than (ordered, non-signaling) \n +/// 0x1F: True (unordered, signaling) +/// \returns A 128-bit vector of [4 x float] containing the comparison results. +/// \fn __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c) + /// Takes a [8 x i32] vector and returns the vector element value /// indexed by the immediate constant operand. /// diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index db6d43ea4ad6fe..cfd2277bbd88cc 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4768,7 +4768,6 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n -/// (Note that without avx enabled, only bits [2:0] are supported) \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n @@ -4777,30 +4776,6 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n -/// 0x08: Equal (unordered, non-signaling) \n -/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n -/// 0x0A: Not-greater-than (unordered, signaling) \n -/// 0x0B: False (ordered, non-signaling) \n -/// 0x0C: Not-equal (ordered, non-signaling) \n -/// 0x0D: Greater-than-or-equal (ordered, signaling) \n -/// 0x0E: Greater-than (ordered, signaling) \n -/// 0x0F: True (unordered, non-signaling) \n -/// 0x10: Equal (ordered, signaling) \n -/// 0x11: Less-than (ordered, non-signaling) \n -/// 0x12: Less-than-or-equal (ordered, non-signaling) \n -/// 0x13: Unordered (signaling) \n -/// 0x14: Not-equal (unordered, signaling) \n -/// 0x15: Not-less-than (unordered, non-signaling) \n -/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n -/// 0x17: Ordered (signaling) \n -/// 0x18: Equal (unordered, signaling) \n -/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n -/// 0x1A: Not-greater-than (unordered, non-signaling) \n -/// 0x1B: False (ordered, signaling) \n -/// 0x1C: Not-equal (ordered, signaling) \n -/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n -/// 0x1E: Greater-than (ordered, non-signaling) \n -/// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [2 x double] containing the comparison results. #define _mm_cmp_pd(a, b, c) \ ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ @@ -4828,7 +4803,6 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n -/// (Note that without avx enabled, only bits [2:0] are supported) \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n @@ -4837,30 +4811,6 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n -/// 0x08: Equal (unordered, non-signaling) \n -/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n -/// 0x0A: Not-greater-than (unordered, signaling) \n -/// 0x0B: False (ordered, non-signaling) \n -/// 0x0C: Not-equal (ordered, non-signaling) \n -/// 0x0D: Greater-than-or-equal (ordered, signaling) \n -/// 0x0E: Greater-than (ordered, signaling) \n -/// 0x0F: True (unordered, non-signaling) \n -/// 0x10: Equal (ordered, signaling) \n -/// 0x11: Less-than (ordered, non-signaling) \n -/// 0x12: Less-than-or-equal (ordered, non-signaling) \n -/// 0x13: Unordered (signaling) \n -/// 0x14: Not-equal (unordered, signaling) \n -/// 0x15: Not-less-than (unordered, non-signaling) \n -/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n -/// 0x17: Ordered (signaling) \n -/// 0x18: Equal (unordered, signaling) \n -/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n -/// 0x1A: Not-greater-than (unordered, non-signaling) \n -/// 0x1B: False (ordered, signaling) \n -/// 0x1C: Not-equal (ordered, signaling) \n -/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n -/// 0x1E: Greater-than (ordered, non-signaling) \n -/// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [2 x double] containing the comparison results. #define _mm_cmp_sd(a, b, c) \ ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index f9894c23ce25bf..a99381ac26207e 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2973,7 +2973,6 @@ _mm_movemask_ps(__m128 __a) /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n -/// (Note that without avx enabled, only bits [2:0] are supported) \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n @@ -2982,30 +2981,6 @@ _mm_movemask_ps(__m128 __a) /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n -/// 0x08: Equal (unordered, non-signaling) \n -/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n -/// 0x0A: Not-greater-than (unordered, signaling) \n -/// 0x0B: False (ordered, non-signaling) \n -/// 0x0C: Not-equal (ordered, non-signaling) \n -/// 0x0D: Greater-than-or-equal (ordered, signaling) \n -/// 0x0E: Greater-than (ordered, signaling) \n -/// 0x0F: True (unordered, non-signaling) \n -/// 0x10: Equal (ordered, signaling) \n -/// 0x11: Less-than (ordered, non-signaling) \n -/// 0x12: Less-than-or-equal (ordered, non-signaling) \n -/// 0x13: Unordered (signaling) \n -/// 0x14: Not-equal (unordered, signaling) \n -/// 0x15: Not-less-than (unordered, non-signaling) \n -/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n -/// 0x17: Ordered (signaling) \n -/// 0x18: Equal (unordered, signaling) \n -/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n -/// 0x1A: Not-greater-than (unordered, non-signaling) \n -/// 0x1B: False (ordered, signaling) \n -/// 0x1C: Not-equal (ordered, signaling) \n -/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n -/// 0x1E: Greater-than (ordered, non-signaling) \n -/// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [4 x float] containing the comparison results. #define _mm_cmp_ps(a, b, c) \ ((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c))) @@ -3032,7 +3007,6 @@ _mm_movemask_ps(__m128 __a) /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n -/// (Note that without avx enabled, only bits [2:0] are supported) \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n @@ -3041,30 +3015,6 @@ _mm_movemask_ps(__m128 __a) /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n -/// 0x08: Equal (unordered, non-signaling) \n -/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n -/// 0x0A: Not-greater-than (unordered, signaling) \n -/// 0x0B: False (ordered, non-signaling) \n -/// 0x0C: Not-equal (ordered, non-signaling) \n -/// 0x0D: Greater-than-or-equal (ordered, signaling) \n -/// 0x0E: Greater-than (ordered, signaling) \n -/// 0x0F: True (unordered, non-signaling) \n -/// 0x10: Equal (ordered, signaling) \n -/// 0x11: Less-than (ordered, non-signaling) \n -/// 0x12: Less-than-or-equal (ordered, non-signaling) \n -/// 0x13: Unordered (signaling) \n -/// 0x14: Not-equal (unordered, signaling) \n -/// 0x15: Not-less-than (unordered, non-signaling) \n -/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n -/// 0x17: Ordered (signaling) \n -/// 0x18: Equal (unordered, signaling) \n -/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n -/// 0x1A: Not-greater-than (unordered, non-signaling) \n -/// 0x1B: False (ordered, signaling) \n -/// 0x1C: Not-equal (ordered, signaling) \n -/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n -/// 0x1E: Greater-than (ordered, non-signaling) \n -/// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [4 x float] containing the comparison results. #define _mm_cmp_ss(a, b, c) \ ((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c))) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits