llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: why (whytolearn) <details> <summary>Changes</summary> [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add MMX/SSE/AVX PHADD/SUB & HADDPS/D intrinsics to be used in constexpr #<!-- -->155395 cover func: _mm_hadd_pi16 _mm_hadd_epi16 _mm256_hadd_epi16 _mm_hadd_pi32 _mm_hadd_epi32 _mm256_hadd_epi32 _mm_hadds_pi16 _mm_hadds_epi16 _mm256_hadds_epi16 _mm_hsub_pi16 _mm_hsub_epi16 _mm256_hsub_epi16 _mm_hsub_pi32 _mm_hsub_epi32 _mm256_hsub_epi32 _mm_hsubs_pi16 _mm_hsubs_epi16 _mm256_hsubs_epi16 _mm_hadd_pd _mm256_hadd_pd _mm_hadd_ps _mm256_hadd_ps _mm_hsub_pd _mm256_hsub_pd _mm_hsub_ps _mm256_hsub_ps --- Patch is 32.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156822.diff 8 Files Affected: - (modified) clang/lib/Headers/avx2intrin.h (+18-24) - (modified) clang/lib/Headers/avxintrin.h (+8-12) - (modified) clang/lib/Headers/pmmintrin.h (+8-12) - (modified) clang/lib/Headers/tmmintrin.h (+52-67) - (modified) clang/test/CodeGen/X86/avx-builtins.c (+29) - (modified) clang/test/CodeGen/X86/avx2-builtins.c (+63) - (modified) clang/test/CodeGen/X86/mmx-builtins.c (+48) - (modified) clang/test/CodeGen/X86/ssse3-builtins.c (+49) ``````````diff diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 4d03103ac1e08..54fe41cad6a46 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -854,10 +854,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadd_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadd_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit @@ -886,10 +885,9 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadd_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadd_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit @@ -921,10 +919,9 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadds_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadds_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit @@ -957,10 +954,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsub_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsub_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit @@ -989,10 +985,9 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsub_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsub_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit @@ -1025,10 +1020,9 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsubs_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsubs_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 03c034f64cbea..fb7110f145c38 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -703,9 +703,8 @@ _mm256_xor_ps(__m256 __a, __m256 __b) /// elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of /// both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_hadd_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_hadd_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); } @@ -726,9 +725,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b) /// index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of /// both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_hadd_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hadd_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); } @@ -749,9 +747,8 @@ _mm256_hadd_ps(__m256 __a, __m256 __b) /// odd-indexed elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal /// differences of both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_hsub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_hsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); } @@ -772,9 +769,8 @@ _mm256_hsub_pd(__m256d __a, __m256d __b) /// elements with index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal /// differences of both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_hsub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); } diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index cd605df7fb52d..67f2a7ffd1f56 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -89,9 +89,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b) /// destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of /// both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_hadd_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); } @@ -174,9 +173,8 @@ _mm_moveldup_ps(__m128 __a) /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_addsub_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } @@ -197,9 +195,8 @@ _mm_addsub_pd(__m128d __a, __m128d __b) /// destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of /// both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_hadd_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_pd(__m128d __a, __m128d __b) { return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); } @@ -220,9 +217,8 @@ _mm_hadd_pd(__m128d __a, __m128d __b) /// the destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal /// differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_hsub_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); } diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index f01c61afa8ea2..b408c6a3404ec 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -204,10 +204,9 @@ _mm_abs_epi32(__m128i __a) { /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of /// both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadd_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -227,10 +226,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of /// both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadd_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -250,11 +248,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadd_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -274,11 +271,10 @@ _mm_hadd_pi16(__m64 __a, __m64 __b) /// destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadd_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddd128( - (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -301,10 +297,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b) /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadds_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadds_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -327,11 +322,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadds_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddsw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -351,10 +345,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsub_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -374,10 +367,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsub_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -397,11 +389,10 @@ _mm_hsub_epi32(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsub_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -421,11 +412,10 @@ _mm_hsub_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsub_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubd128( - (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -448,10 +438,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b) /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsubs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsubs_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -474,11 +463,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsubs_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubsw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -509,10 +497,9 @@ _mm_hsubs_pi16(__m64 __a, __m64 __b) /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15) -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maddubs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_maddubs_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -539,11 +526,10 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b) /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_maddubs_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a), - (__v16qi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_maddubs_pi16(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a), + (__v16qi)__anyext128(__b))); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit @@ -560,10 +546,9 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mulhrs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhrs_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 622ac5d50aaf0..9857b84c94112 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1083,24 +1083,53 @@ __m256d test_mm256_hadd_pd(__m256d A, __m256d B) { // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_hadd_pd(A, B); } +constexpr bool test_mm256_hadd_epi32_constexpr() { + constexpr __m256d a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); + constexpr __m256d b = _mm256_set_pd(5.0, 6.0, 7.0, 8.0); + constexpr __m256d result = _mm256_hadd_pd(a, b); + return match_m256d(result,1.0+2.0,3.0+4.0,5.0+6.0,7.0+8.0); +} +TEST_CONSTEXPR(test_mm256_hadd_epi32_constexpr()) __m256 test_mm256_hadd_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_hadd_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_hadd_ps(A, B); } +constexpr bool test_mm256_hadd_ps_constexpr() { + constexpr __m256 a = _mm256_set_ps(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + constexpr __m256 b = _mm256_set_ps(9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f); + constexpr __m256 result = _mm256_hadd_ps(a, b); + return match_m256(result,1.0f+2.0f,3.0f+4.0f,5.0f+6.0f,7.0f+8.0f, + 9.0f+10.0f,11.0f+12.0f,13.0f+14.0f,15.0f+16.0f); +} +TEST_CONSTEXPR(test_mm256_hadd_ps_constexpr()) ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/156822 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits