Author: ctopper Date: Mon May 16 01:38:42 2016 New Revision: 269632 URL: http://llvm.org/viewvc/llvm-project?rev=269632&view=rev Log: [X86] Add typecasts to remove most assumptions about what __m128i/__m256i is defined as. Add similar typecasts for the fp types as well.
Modified: cfe/trunk/lib/Headers/__wmmintrin_aes.h cfe/trunk/lib/Headers/avx2intrin.h cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/fma4intrin.h cfe/trunk/lib/Headers/fmaintrin.h cfe/trunk/lib/Headers/mmintrin.h cfe/trunk/lib/Headers/pmmintrin.h cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/lib/Headers/xopintrin.h Modified: cfe/trunk/lib/Headers/__wmmintrin_aes.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__wmmintrin_aes.h?rev=269632&r1=269631&r2=269632&view=diff ============================================================================== --- cfe/trunk/lib/Headers/__wmmintrin_aes.h (original) +++ cfe/trunk/lib/Headers/__wmmintrin_aes.h Mon May 16 01:38:42 2016 @@ -45,7 +45,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesenc_si128(__m128i __V, __m128i __R) { - return (__m128i)__builtin_ia32_aesenc128(__V, __R); + return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R); } /// \brief Performs the final round of AES encryption using the Equivalent @@ -65,7 +65,7 @@ _mm_aesenc_si128(__m128i __V, __m128i __ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesenclast_si128(__m128i __V, __m128i __R) { - return (__m128i)__builtin_ia32_aesenclast128(__V, __R); + return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R); } /// \brief Performs a single round of AES decryption using the Equivalent @@ -85,7 +85,7 @@ _mm_aesenclast_si128(__m128i __V, __m128 static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesdec_si128(__m128i __V, __m128i __R) { - return (__m128i)__builtin_ia32_aesdec128(__V, __R); + return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R); } /// \brief Performs the final round of AES decryption using the Equivalent @@ -105,7 +105,7 @@ _mm_aesdec_si128(__m128i __V, __m128i __ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesdeclast_si128(__m128i __V, __m128i __R) { - return (__m128i)__builtin_ia32_aesdeclast128(__V, __R); + return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R); } /// \brief Applies the AES InvMixColumns() transformation to an expanded key @@ -122,7 +122,7 @@ _mm_aesdeclast_si128(__m128i __V, __m128 static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesimc_si128(__m128i __V) { - return (__m128i)__builtin_ia32_aesimc128(__V); + return (__m128i)__builtin_ia32_aesimc128((__v2di)__V); } /// \brief Generates a round key for AES encyption, operating on 128-bit data Modified: cfe/trunk/lib/Headers/avx2intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=269632&r1=269631&r2=269632&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avx2intrin.h (original) +++ cfe/trunk/lib/Headers/avx2intrin.h Mon May 16 01:38:42 2016 @@ -97,7 +97,7 @@ _mm256_add_epi32(__m256i __a, __m256i __ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi64(__m256i __a, __m256i __b) { - return __a + __b; + return (__m256i)((__v4di)__a + (__v4di)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -131,13 +131,13 @@ _mm256_adds_epu16(__m256i __a, __m256i _ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_and_si256(__m256i __a, __m256i __b) { - return __a & __b; + return (__m256i)((__v4di)__a & (__v4di)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_andnot_si256(__m256i __a, __m256i __b) { - return ~__a & __b; + return (__m256i)(~(__v4di)__a & (__v4di)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -200,7 +200,7 @@ _mm256_cmpeq_epi32(__m256i __a, __m256i static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpeq_epi64(__m256i __a, __m256i __b) { - return (__m256i)(__a == __b); + return (__m256i)((__v4di)__a == (__v4di)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -226,7 +226,7 @@ _mm256_cmpgt_epi32(__m256i __a, __m256i static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpgt_epi64(__m256i __a, __m256i __b) { - return (__m256i)(__a > __b); + return (__m256i)((__v4di)__a > (__v4di)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -472,7 +472,7 @@ _mm256_mul_epu32(__m256i __a, __m256i __ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_or_si256(__m256i __a, __m256i __b) { - return __a | __b; + return (__m256i)((__v4di)__a | (__v4di)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -573,13 +573,13 @@ _mm256_sll_epi32(__m256i __a, __m128i __ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_slli_epi64(__m256i __a, int __count) { - return __builtin_ia32_psllqi256(__a, __count); + return __builtin_ia32_psllqi256((__v4di)__a, __count); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sll_epi64(__m256i __a, __m128i __count) { - return __builtin_ia32_psllq256(__a, __count); + return __builtin_ia32_psllq256((__v4di)__a, __count); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -638,13 +638,13 @@ _mm256_srl_epi32(__m256i __a, __m128i __ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srli_epi64(__m256i __a, int __count) { - return __builtin_ia32_psrlqi256(__a, __count); + return __builtin_ia32_psrlqi256((__v4di)__a, __count); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srl_epi64(__m256i __a, __m128i __count) { - return __builtin_ia32_psrlq256(__a, __count); + return __builtin_ia32_psrlq256((__v4di)__a, __count); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -668,7 +668,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi64(__m256i __a, __m256i __b) { - return __a - __b; + return (__m256i)((__v4di)__a - (__v4di)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -716,7 +716,7 @@ _mm256_unpackhi_epi32(__m256i __a, __m25 static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi64(__m256i __a, __m256i __b) { - return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3); + return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -740,13 +740,13 @@ _mm256_unpacklo_epi32(__m256i __a, __m25 static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi64(__m256i __a, __m256i __b) { - return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2); + return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_xor_si256(__m256i __a, __m256i __b) { - return __a ^ __b; + return (__m256i)((__v4di)__a ^ (__v4di)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -764,7 +764,7 @@ _mm_broadcastss_ps(__m128 __X) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_broadcastsd_pd(__m128d __a) { - return __builtin_shufflevector(__a, __a, 0, 0); + return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } static __inline__ __m256 __DEFAULT_FN_ATTRS @@ -782,7 +782,7 @@ _mm256_broadcastsd_pd(__m128d __X) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastsi128_si256(__m128i __X) { - return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1); + return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1); } #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ @@ -826,7 +826,7 @@ _mm256_broadcastd_epi32(__m128i __X) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastq_epi64(__m128i __X) { - return (__m256i)__builtin_shufflevector(__X, __X, 0, 0, 0, 0); + return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -851,7 +851,7 @@ _mm_broadcastd_epi32(__m128i __X) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastq_epi64(__m128i __X) { - return (__m128i)__builtin_shufflevector(__X, __X, 0, 0); + return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -904,7 +904,7 @@ _mm256_maskload_epi32(int const *__X, __ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskload_epi64(long long const *__X, __m256i __M) { - return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M); + return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -928,7 +928,7 @@ _mm256_maskstore_epi32(int *__X, __m256i static __inline__ void __DEFAULT_FN_ATTRS _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) { - __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y); + __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y); } static __inline__ void __DEFAULT_FN_ATTRS @@ -940,7 +940,7 @@ _mm_maskstore_epi32(int *__X, __m128i __ static __inline__ void __DEFAULT_FN_ATTRS _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) { - __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y); + __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -958,13 +958,13 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sllv_epi64(__m256i __X, __m256i __Y) { - return (__m256i)__builtin_ia32_psllv4di(__X, __Y); + return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sllv_epi64(__m128i __X, __m128i __Y) { - return (__m128i)__builtin_ia32_psllv2di(__X, __Y); + return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -994,13 +994,13 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srlv_epi64(__m256i __X, __m256i __Y) { - return (__m256i)__builtin_ia32_psrlv4di(__X, __Y); + return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srlv_epi64(__m128i __X, __m128i __Y) { - return (__m128i)__builtin_ia32_psrlv2di(__X, __Y); + return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y); } #define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ Modified: cfe/trunk/lib/Headers/avxintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=269632&r1=269631&r2=269632&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avxintrin.h (original) +++ cfe/trunk/lib/Headers/avxintrin.h Mon May 16 01:38:42 2016 @@ -62,7 +62,7 @@ typedef long long __m256i __attribute__( static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd(__m256d __a, __m256d __b) { - return __a+__b; + return (__m256d)((__v4df)__a+(__v4df)__b); } /// \brief Adds two 256-bit vectors of [8 x float]. @@ -80,7 +80,7 @@ _mm256_add_pd(__m256d __a, __m256d __b) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps(__m256 __a, __m256 __b) { - return __a+__b; + return (__m256)((__v8sf)__a+(__v8sf)__b); } /// \brief Subtracts two 256-bit vectors of [4 x double]. @@ -98,7 +98,7 @@ _mm256_add_ps(__m256 __a, __m256 __b) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd(__m256d __a, __m256d __b) { - return __a-__b; + return (__m256d)((__v4df)__a-(__v4df)__b); } /// \brief Subtracts two 256-bit vectors of [8 x float]. @@ -116,7 +116,7 @@ _mm256_sub_pd(__m256d __a, __m256d __b) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps(__m256 __a, __m256 __b) { - return __a-__b; + return (__m256)((__v8sf)__a-(__v8sf)__b); } /// \brief Adds the even-indexed values and subtracts the odd-indexed values of @@ -172,7 +172,7 @@ _mm256_addsub_ps(__m256 __a, __m256 __b) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd(__m256d __a, __m256d __b) { - return __a / __b; + return (__m256d)((__v4df)__a/(__v4df)__b); } /// \brief Divides two 256-bit vectors of [8 x float]. @@ -190,7 +190,7 @@ _mm256_div_pd(__m256d __a, __m256d __b) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps(__m256 __a, __m256 __b) { - return __a / __b; + return (__m256)((__v8sf)__a/(__v8sf)__b); } /// \brief Compares two 256-bit vectors of [4 x double] and returns the greater @@ -284,7 +284,7 @@ _mm256_min_ps(__m256 __a, __m256 __b) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd(__m256d __a, __m256d __b) { - return __a * __b; + return (__m256d)((__v4df)__a * (__v4df)__b); } /// \brief Multiplies two 256-bit vectors of [8 x float]. @@ -302,7 +302,7 @@ _mm256_mul_pd(__m256d __a, __m256d __b) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps(__m256 __a, __m256 __b) { - return __a * __b; + return (__m256)((__v8sf)__a * (__v8sf)__b); } /// \brief Calculates the square roots of the values stored in a 256-bit vector @@ -1443,44 +1443,44 @@ _mm256_cvttps_epi32(__m256 __a) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a) { - return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); + return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7); } static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a) { - return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6); + return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6); } static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a) { - return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); + return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2); } /* Unpack and Interleave */ static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b) { - return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2); + return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2); } static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b) { - return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2); + return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2); } static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b) { - return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); + return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); } static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b) { - return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); + return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); } /* Bit Test */ @@ -1625,13 +1625,13 @@ _mm256_broadcast_ss(float const *__a) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_pd(__m128d const *__a) { - return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a); + return (__m256d)__builtin_ia32_vbroadcastf128_pd256((__v2df const *)__a); } static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ps(__m128 const *__a) { - return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a); + return (__m256)__builtin_ia32_vbroadcastf128_ps256((__v4sf const *)__a); } /* SIMD load ops */ @@ -2019,37 +2019,37 @@ _mm256_castsi256_pd(__m256i __a) static __inline __m128d __DEFAULT_FN_ATTRS _mm256_castpd256_pd128(__m256d __a) { - return __builtin_shufflevector(__a, __a, 0, 1); + return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1); } static __inline __m128 __DEFAULT_FN_ATTRS _mm256_castps256_ps128(__m256 __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); + return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3); } static __inline __m128i __DEFAULT_FN_ATTRS _mm256_castsi256_si128(__m256i __a) { - return __builtin_shufflevector(__a, __a, 0, 1); + return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1); } static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256(__m128d __a) { - return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); + return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1); } static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256(__m128 __a) { - return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); + return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1); } static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256(__m128i __a) { - return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); + return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1); } /* @@ -2187,7 +2187,7 @@ _mm256_storeu2_m128i(__m128i *__addr_hi, static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_m128 (__m128 __hi, __m128 __lo) { - return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7); + return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline __m256d __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/emmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=269632&r1=269631&r2=269632&view=diff ============================================================================== --- cfe/trunk/lib/Headers/emmintrin.h (original) +++ cfe/trunk/lib/Headers/emmintrin.h Mon May 16 01:38:42 2016 @@ -54,7 +54,7 @@ _mm_add_sd(__m128d __a, __m128d __b) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b) { - return __a + __b; + return (__m128d)((__v2df)__a + (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -67,7 +67,7 @@ _mm_sub_sd(__m128d __a, __m128d __b) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b) { - return __a - __b; + return (__m128d)((__v2df)__a - (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -80,7 +80,7 @@ _mm_mul_sd(__m128d __a, __m128d __b) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b) { - return __a * __b; + return (__m128d)((__v2df)__a * (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -93,44 +93,44 @@ _mm_div_sd(__m128d __a, __m128d __b) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b) { - return __a / __b; + return (__m128d)((__v2df)__a / (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_sqrtsd(__b); + __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { - return __builtin_ia32_sqrtpd(__a); + return __builtin_ia32_sqrtpd((__v2df)__a); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_minsd(__a, __b); + return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b) { - return __builtin_ia32_minpd(__a, __b); + return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_maxsd(__a, __b); + return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b) { - return __builtin_ia32_maxpd(__a, __b); + return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -160,233 +160,233 @@ _mm_xor_pd(__m128d __a, __m128d __b) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpeqpd(__a, __b); + return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpltpd(__a, __b); + return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmplepd(__a, __b); + return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpltpd(__b, __a); + return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmplepd(__b, __a); + return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpordpd(__a, __b); + return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpunordpd(__a, __b); + return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpneqpd(__a, __b); + return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpnltpd(__a, __b); + return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpnlepd(__a, __b); + return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpnltpd(__b, __a); + return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpnlepd(__b, __a); + return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpeqsd(__a, __b); + return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpltsd(__a, __b); + return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmplesd(__a, __b); + return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpltsd(__b, __a); + __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmplesd(__b, __a); + __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpordsd(__a, __b); + return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpunordsd(__a, __b); + return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpneqsd(__a, __b); + return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpnltsd(__a, __b); + return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpnlesd(__a, __b); + return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpnltsd(__b, __a); + __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpnlesd(__b, __a); + __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); return (__m128d) { __c[0], __a[1] }; } static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_comisdeq(__a, __b); + return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_comisdlt(__a, __b); + return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_comisdle(__a, __b); + return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_comisdgt(__a, __b); + return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_comisdge(__a, __b); + return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_comisdneq(__a, __b); + return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_ucomisdeq(__a, __b); + return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_ucomisdlt(__a, __b); + return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_ucomisdle(__a, __b); + return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_ucomisdgt(__a, __b); + return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_ucomisdge(__a, __b); + return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); } static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b) { - return __builtin_ia32_ucomisdneq(__a, __b); + return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { - return __builtin_ia32_cvtpd2ps(__a); + return __builtin_ia32_cvtpd2ps((__v2df)__a); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { - return __builtin_ia32_cvtps2pd(__a); + return __builtin_ia32_cvtps2pd((__v4sf)__a); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -398,13 +398,13 @@ _mm_cvtepi32_pd(__m128i __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a) { - return __builtin_ia32_cvtpd2dq(__a); + return __builtin_ia32_cvtpd2dq((__v2df)__a); } static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) { - return __builtin_ia32_cvtsd2si(__a); + return __builtin_ia32_cvtsd2si((__v2df)__a); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -431,7 +431,7 @@ _mm_cvtss_sd(__m128d __a, __m128 __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a) { - return (__m128i)__builtin_ia32_cvttpd2dq(__a); + return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); } static __inline__ int __DEFAULT_FN_ATTRS @@ -443,13 +443,13 @@ _mm_cvttsd_si32(__m128d __a) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtpd_pi32(__m128d __a) { - return (__m64)__builtin_ia32_cvtpd2pi(__a); + return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); } static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttpd_pi32(__m128d __a) { - return (__m64)__builtin_ia32_cvttpd2pi(__a); + return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -486,7 +486,7 @@ static __inline__ __m128d __DEFAULT_FN_A _mm_loadr_pd(double const *__dp) { __m128d __u = *(__m128d*)__dp; - return __builtin_shufflevector(__u, __u, 1, 0); + return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); } static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -598,13 +598,13 @@ _mm_store_pd(double *__dp, __m128d __a) static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a) { - __builtin_ia32_storeupd(__dp, __a); + __builtin_ia32_storeupd(__dp, (__v2df)__a); } static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a) { - __a = __builtin_shufflevector(__a, __a, 1, 0); + __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0); *(__m128d *)__dp = __a; } @@ -647,13 +647,13 @@ _mm_add_epi32(__m128i __a, __m128i __b) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_paddq(__a, __b); + return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b) { - return __a + __b; + return (__m128i)((__v2di)__a + (__v2di)__b); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -883,7 +883,7 @@ _mm_sub_epi32(__m128i __a, __m128i __b) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_psubq(__a, __b); + return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); } /// \brief Subtracts the corresponding elements of two [2 x i64] vectors. @@ -901,7 +901,7 @@ _mm_sub_si64(__m64 __a, __m64 __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b) { - return __a - __b; + return (__m128i)((__v2di)__a - (__v2di)__b); } /// \brief Subtracts corresponding 8-bit signed integer values in the input and @@ -1001,7 +1001,7 @@ _mm_subs_epu16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b) { - return __a & __b; + return (__m128i)((__v2di)__a & (__v2di)__b); } /// \brief Performs a bitwise AND of two 128-bit integer vectors, using the @@ -1021,7 +1021,7 @@ _mm_and_si128(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b) { - return ~__a & __b; + return (__m128i)(~(__v2di)__a & (__v2di)__b); } /// \brief Performs a bitwise OR of two 128-bit integer vectors. /// @@ -1038,7 +1038,7 @@ _mm_andnot_si128(__m128i __a, __m128i __ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b) { - return __a | __b; + return (__m128i)((__v2di)__a | (__v2di)__b); } /// \brief Performs a bitwise exclusive OR of two 128-bit integer vectors. @@ -1056,7 +1056,7 @@ _mm_or_si128(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b) { - return __a ^ __b; + return (__m128i)((__v2di)__a ^ (__v2di)__b); } /// \brief Left-shifts the 128-bit integer vector operand by the specified @@ -1191,7 +1191,7 @@ _mm_sll_epi32(__m128i __a, __m128i __cou static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count) { - return __builtin_ia32_psllqi128(__a, __count); + return __builtin_ia32_psllqi128((__v2di)__a, __count); } /// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand @@ -1210,7 +1210,7 @@ _mm_slli_epi64(__m128i __a, int __count) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count) { - return __builtin_ia32_psllq128(__a, __count); + return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); } /// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand @@ -1425,7 +1425,7 @@ _mm_srl_epi32(__m128i __a, __m128i __cou static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count) { - return __builtin_ia32_psrlqi128(__a, __count); + return __builtin_ia32_psrlqi128((__v2di)__a, __count); } /// \brief Right-shifts each of 64-bit values in the 128-bit integer vector @@ -1444,7 +1444,7 @@ _mm_srli_epi64(__m128i __a, int __count) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count) { - return __builtin_ia32_psrlq128(__a, __count); + return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); } /// \brief Compares each of the corresponding 8-bit values of the 128-bit @@ -1665,7 +1665,7 @@ _mm_cvtsi64_sd(__m128d __a, long long __ static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsd_si64(__m128d __a) { - return __builtin_ia32_cvtsd2si64(__a); + return __builtin_ia32_cvtsd2si64((__v2df)__a); } /// \brief Converts the first (lower) element of a vector of [2 x double] into a @@ -1714,7 +1714,7 @@ _mm_cvtepi32_ps(__m128i __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a) { - return (__m128i)__builtin_ia32_cvtps2dq(__a); + return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); } /// \brief Converts a vector of [4 x float] into a vector of [4 x i32], @@ -1730,7 +1730,7 @@ _mm_cvtps_epi32(__m128 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { - return (__m128i)__builtin_ia32_cvttps2dq(__a); + return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); } /// \brief Returns a vector of [4 x i32] where the lowest element is the input @@ -2190,13 +2190,13 @@ _mm_storel_epi64(__m128i *__p, __m128i _ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, __m128d __a) { - __builtin_ia32_movntpd(__p, __a); + __builtin_ia32_movntpd(__p, (__v2df)__a); } static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, __m128i __a) { - __builtin_ia32_movntdq(__p, __a); + __builtin_ia32_movntdq(__p, (__v2di)__a); } static __inline__ void __DEFAULT_FN_ATTRS @@ -2313,7 +2313,7 @@ _mm_unpackhi_epi32(__m128i __a, __m128i static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b) { - return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1); + return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -2337,7 +2337,7 @@ _mm_unpacklo_epi32(__m128i __a, __m128i static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b) { - return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0); + return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0); } static __inline__ __m64 __DEFAULT_FN_ATTRS @@ -2355,25 +2355,25 @@ _mm_movpi64_epi64(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { - return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2); + return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b) { - return __builtin_shufflevector(__a, __b, 1, 2+1); + return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b) { - return __builtin_shufflevector(__a, __b, 0, 2+0); + return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0); } static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { - return __builtin_ia32_movmskpd(__a); + return __builtin_ia32_movmskpd((__v2df)__a); } #define _mm_shuffle_pd(a, b, i) __extension__ ({ \ Modified: cfe/trunk/lib/Headers/fma4intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/fma4intrin.h?rev=269632&r1=269631&r2=269632&view=diff ============================================================================== --- cfe/trunk/lib/Headers/fma4intrin.h (original) +++ cfe/trunk/lib/Headers/fma4intrin.h Mon May 16 01:38:42 2016 @@ -36,193 +36,193 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); + return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); + return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); + return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); + return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); + return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); + return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); + return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); + return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); + return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); + return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } #undef __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/fmaintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/fmaintrin.h?rev=269632&r1=269631&r2=269632&view=diff ============================================================================== --- cfe/trunk/lib/Headers/fmaintrin.h (original) +++ cfe/trunk/lib/Headers/fmaintrin.h Mon May 16 01:38:42 2016 @@ -34,193 +34,193 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); + return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); + return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); + return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); + return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); + return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); + return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); + return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); + return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); + return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); + return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); + return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); + return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); + return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } #undef __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/mmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/mmintrin.h?rev=269632&r1=269631&r2=269632&view=diff ============================================================================== --- cfe/trunk/lib/Headers/mmintrin.h (original) +++ cfe/trunk/lib/Headers/mmintrin.h Mon May 16 01:38:42 2016 @@ -26,6 +26,7 @@ typedef long long __m64 __attribute__((__vector_size__(8))); +typedef long long __v1di __attribute__((__vector_size__(8))); typedef int __v2si __attribute__((__vector_size__(8))); typedef short __v4hi __attribute__((__vector_size__(8))); typedef char __v8qi __attribute__((__vector_size__(8))); @@ -246,13 +247,13 @@ _mm_slli_pi32(__m64 __m, int __count) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_si64(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_psllq(__m, __count); + return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); } static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_si64(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psllqi(__m, __count); + return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); } static __inline__ __m64 __DEFAULT_FN_ATTRS @@ -306,37 +307,37 @@ _mm_srli_pi32(__m64 __m, int __count) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_si64(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_psrlq(__m, __count); + return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); } static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_si64(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psrlqi(__m, __count); + return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); } static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_and_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_pand(__m1, __m2); + return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_andnot_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_pandn(__m1, __m2); + return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_or_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_por(__m1, __m2); + return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_xor_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_pxor(__m1, __m2); + return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/pmmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/pmmintrin.h?rev=269632&r1=269631&r2=269632&view=diff ============================================================================== --- cfe/trunk/lib/Headers/pmmintrin.h (original) +++ cfe/trunk/lib/Headers/pmmintrin.h Mon May 16 01:38:42 2016 @@ -64,7 +64,7 @@ _mm_lddqu_si128(__m128i const *__p) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_addsub_ps(__m128 __a, __m128 __b) { - return __builtin_ia32_addsubps(__a, __b); + return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); } /// \brief Horizontally adds the adjacent pairs of values contained in two @@ -87,7 +87,7 @@ _mm_addsub_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hadd_ps(__m128 __a, __m128 __b) { - return __builtin_ia32_haddps(__a, __b); + return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); } /// \brief Horizontally subtracts the adjacent pairs of values contained in two @@ -110,7 +110,7 @@ _mm_hadd_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hsub_ps(__m128 __a, __m128 __b) { - return __builtin_ia32_hsubps(__a, __b); + return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); } /// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit @@ -132,7 +132,7 @@ _mm_hsub_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a) { - return __builtin_shufflevector(__a, __a, 1, 1, 3, 3); + return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3); } /// \brief Duplicates low-order (even-indexed) values from a 128-bit @@ -154,7 +154,7 @@ _mm_movehdup_ps(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a) { - return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); + return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2); } /// \brief Adds the even-indexed values and subtracts the odd-indexed values of @@ -173,7 +173,7 @@ _mm_moveldup_ps(__m128 __a) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_addsub_pd(__m128d __a, __m128d __b) { - return __builtin_ia32_addsubpd(__a, __b); + return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } /// \brief Horizontally adds the pairs of values contained in two 128-bit @@ -196,7 +196,7 @@ _mm_addsub_pd(__m128d __a, __m128d __b) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hadd_pd(__m128d __a, __m128d __b) { - return __builtin_ia32_haddpd(__a, __b); + return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); } /// \brief Horizontally subtracts the pairs of values contained in two 128-bit @@ -219,7 +219,7 @@ _mm_hadd_pd(__m128d __a, __m128d __b) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hsub_pd(__m128d __a, __m128d __b) { - return __builtin_ia32_hsubpd(__a, __b); + return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); } /// \brief Moves and duplicates one double-precision value to double-precision @@ -255,7 +255,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a) { - return __builtin_shufflevector(__a, __a, 0, 0); + return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } #define _MM_DENORMALS_ZERO_ON (0x0040) Modified: cfe/trunk/lib/Headers/xmmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=269632&r1=269631&r2=269632&view=diff ============================================================================== --- cfe/trunk/lib/Headers/xmmintrin.h (original) +++ cfe/trunk/lib/Headers/xmmintrin.h Mon May 16 01:38:42 2016 @@ -77,7 +77,7 @@ _mm_add_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b) { - return __a + __b; + return (__m128)((__v4sf)__a + (__v4sf)__b); } /// \brief Subtracts the 32-bit float value in the low-order bits of the second @@ -120,7 +120,7 @@ _mm_sub_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b) { - return __a - __b; + return (__m128)((__v4sf)__a - (__v4sf)__b); } /// \brief Multiplies two 32-bit float values in the low-order bits of the @@ -162,7 +162,7 @@ _mm_mul_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b) { - return __a * __b; + return (__m128)((__v4sf)__a * (__v4sf)__b); } /// \brief Divides the value in the low-order 32 bits of the first operand by @@ -203,7 +203,7 @@ _mm_div_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b) { - return __a / __b; + return (__m128)((__v4sf)__a / (__v4sf)__b); } /// \brief Calculates the square root of the value stored in the low-order bits @@ -221,7 +221,7 @@ _mm_div_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a) { - __m128 __c = __builtin_ia32_sqrtss(__a); + __m128 __c = __builtin_ia32_sqrtss((__v4sf)__a); return (__m128) { __c[0], __a[1], __a[2], __a[3] }; } @@ -239,7 +239,7 @@ _mm_sqrt_ss(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a) { - return __builtin_ia32_sqrtps(__a); + return __builtin_ia32_sqrtps((__v4sf)__a); } /// \brief Calculates the approximate reciprocal of the value stored in the @@ -257,7 +257,7 @@ _mm_sqrt_ps(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ss(__m128 __a) { - __m128 __c = __builtin_ia32_rcpss(__a); + __m128 __c = __builtin_ia32_rcpss((__v4sf)__a); return (__m128) { __c[0], __a[1], __a[2], __a[3] }; } @@ -275,7 +275,7 @@ _mm_rcp_ss(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ps(__m128 __a) { - return __builtin_ia32_rcpps(__a); + return __builtin_ia32_rcpps((__v4sf)__a); } /// \brief Calculates the approximate reciprocal of the square root of the value @@ -294,7 +294,7 @@ _mm_rcp_ps(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ss(__m128 __a) { - __m128 __c = __builtin_ia32_rsqrtss(__a); + __m128 __c = __builtin_ia32_rsqrtss((__v4sf)__a); return (__m128) { __c[0], __a[1], __a[2], __a[3] }; } @@ -312,7 +312,7 @@ _mm_rsqrt_ss(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ps(__m128 __a) { - return __builtin_ia32_rsqrtps(__a); + return __builtin_ia32_rsqrtps((__v4sf)__a); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -335,7 +335,7 @@ _mm_rsqrt_ps(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_minss(__a, __b); + return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 128-bit vectors of [4 x float] and returns the @@ -354,7 +354,7 @@ _mm_min_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b) { - return __builtin_ia32_minps(__a, __b); + return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -377,7 +377,7 @@ _mm_min_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_maxss(__a, __b); + return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 128-bit vectors of [4 x float] and returns the greater @@ -396,7 +396,7 @@ _mm_max_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b) { - return __builtin_ia32_maxps(__a, __b); + return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b); } /// \brief Performs a bitwise AND of two 128-bit vectors of [4 x float]. @@ -495,7 +495,7 @@ _mm_xor_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpeqss(__a, __b); + return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b); } /// \brief Compares each of the corresponding 32-bit float values of the @@ -513,7 +513,7 @@ _mm_cmpeq_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpeqps(__a, __b); + return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -536,7 +536,7 @@ _mm_cmpeq_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpltss(__a, __b); + return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b); } /// \brief Compares each of the corresponding 32-bit float values of the @@ -555,7 +555,7 @@ _mm_cmplt_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpltps(__a, __b); + return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -579,7 +579,7 @@ _mm_cmplt_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpless(__a, __b); + return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b); } /// \brief Compares each of the corresponding 32-bit float values of the @@ -598,7 +598,7 @@ _mm_cmple_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpleps(__a, __b); + return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -621,8 +621,8 @@ _mm_cmple_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpltss(__b, __a), + return (__m128)__builtin_shufflevector((__v4sf)__a, + (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } @@ -642,7 +642,7 @@ _mm_cmpgt_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpltps(__b, __a); + return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -666,8 +666,8 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpless(__b, __a), + return (__m128)__builtin_shufflevector((__v4sf)__a, + (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } @@ -687,7 +687,7 @@ _mm_cmpge_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpleps(__b, __a); + return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -709,7 +709,7 @@ _mm_cmpge_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpneqss(__a, __b); + return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b); } /// \brief Compares each of the corresponding 32-bit float values of the @@ -727,7 +727,7 @@ _mm_cmpneq_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpneqps(__a, __b); + return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -750,7 +750,7 @@ _mm_cmpneq_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpnltss(__a, __b); + return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b); } /// \brief Compares each of the corresponding 32-bit float values of the @@ -769,7 +769,7 @@ _mm_cmpnlt_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpnltps(__a, __b); + return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -793,7 +793,7 @@ _mm_cmpnlt_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpnless(__a, __b); + return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b); } /// \brief Compares each of the corresponding 32-bit float values of the @@ -812,7 +812,7 @@ _mm_cmpnle_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpnleps(__a, __b); + return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -836,8 +836,8 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpnltss(__b, __a), + return (__m128)__builtin_shufflevector((__v4sf)__a, + (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } @@ -857,7 +857,7 @@ _mm_cmpngt_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpnltps(__b, __a); + return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -881,8 +881,8 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpnless(__b, __a), + return (__m128)__builtin_shufflevector((__v4sf)__a, + (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } @@ -902,7 +902,7 @@ _mm_cmpnge_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpnleps(__b, __a); + return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -926,7 +926,7 @@ _mm_cmpnge_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpordss(__a, __b); + return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b); } /// \brief Compares each of the corresponding 32-bit float values of the @@ -945,7 +945,7 @@ _mm_cmpord_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpordps(__a, __b); + return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -969,7 +969,7 @@ _mm_cmpord_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpunordss(__a, __b); + return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b); } /// \brief Compares each of the corresponding 32-bit float values of the @@ -988,7 +988,7 @@ _mm_cmpunord_ss(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpunordps(__a, __b); + return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -1008,7 +1008,7 @@ _mm_cmpunord_ps(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_comieq(__a, __b); + return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -1029,7 +1029,7 @@ _mm_comieq_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_comilt(__a, __b); + return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -1050,7 +1050,7 @@ _mm_comilt_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_comile(__a, __b); + return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -1071,7 +1071,7 @@ _mm_comile_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_comigt(__a, __b); + return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -1092,7 +1092,7 @@ _mm_comigt_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_comige(__a, __b); + return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b); } /// \brief Compares two 32-bit float values in the low-order bits of both @@ -1113,7 +1113,7 @@ _mm_comige_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_comineq(__a, __b); + return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b); } /// \brief Performs an unordered comparison of two 32-bit float values using @@ -1134,7 +1134,7 @@ _mm_comineq_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_ucomieq(__a, __b); + return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b); } /// \brief Performs an unordered comparison of two 32-bit float values using @@ -1155,7 +1155,7 @@ _mm_ucomieq_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_ucomilt(__a, __b); + return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b); } /// \brief Performs an unordered comparison of two 32-bit float values using @@ -1177,7 +1177,7 @@ _mm_ucomilt_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_ucomile(__a, __b); + return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b); } /// \brief Performs an unordered comparison of two 32-bit float values using @@ -1199,7 +1199,7 @@ _mm_ucomile_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_ucomigt(__a, __b); + return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b); } /// \brief Performs an unordered comparison of two 32-bit float values using @@ -1221,7 +1221,7 @@ _mm_ucomigt_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_ucomige(__a, __b); + return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b); } /// \brief Performs an unordered comparison of two 32-bit float values using @@ -1242,7 +1242,7 @@ _mm_ucomige_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_ss(__m128 __a, __m128 __b) { - return __builtin_ia32_ucomineq(__a, __b); + return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b); } /// \brief Converts a float value contained in the lower 32 bits of a vector of @@ -1259,7 +1259,7 @@ _mm_ucomineq_ss(__m128 __a, __m128 __b) static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtss_si32(__m128 __a) { - return __builtin_ia32_cvtss2si(__a); + return __builtin_ia32_cvtss2si((__v4sf)__a); } /// \brief Converts a float value contained in the lower 32 bits of a vector of @@ -1295,7 +1295,7 @@ _mm_cvt_ss2si(__m128 __a) static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtss_si64(__m128 __a) { - return __builtin_ia32_cvtss2si64(__a); + return __builtin_ia32_cvtss2si64((__v4sf)__a); } #endif @@ -1313,7 +1313,7 @@ _mm_cvtss_si64(__m128 __a) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtps_pi32(__m128 __a) { - return (__m64)__builtin_ia32_cvtps2pi(__a); + return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a); } /// \brief Converts two low-order float values in a 128-bit vector of @@ -1400,7 +1400,7 @@ _mm_cvttss_si64(__m128 __a) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttps_pi32(__m128 __a) { - return (__m64)__builtin_ia32_cvttps2pi(__a); + return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a); } static __inline__ __m64 __DEFAULT_FN_ATTRS @@ -1436,7 +1436,7 @@ _mm_cvtsi64_ss(__m128 __a, long long __b static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpi32_ps(__m128 __a, __m64 __b) { - return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b); + return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -1516,7 +1516,7 @@ static __inline__ __m128 __DEFAULT_FN_AT _mm_loadr_ps(const float *__p) { __m128 __a = _mm_load_ps(__p); - return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); + return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -1565,13 +1565,13 @@ _mm_setzero_ps(void) static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pi(__m64 *__p, __m128 __a) { - __builtin_ia32_storehps((__v2si *)__p, __a); + __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a); } static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pi(__m64 *__p, __m128 __a) { - __builtin_ia32_storelps((__v2si *)__p, __a); + __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a); } static __inline__ void __DEFAULT_FN_ATTRS @@ -1586,13 +1586,13 @@ _mm_store_ss(float *__p, __m128 __a) static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_ps(float *__p, __m128 __a) { - __builtin_ia32_storeups(__p, __a); + __builtin_ia32_storeups(__p, (__v4sf)__a); } static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_ps(float *__p, __m128 __a) { - __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0); + __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); _mm_storeu_ps(__p, __a); } @@ -1611,7 +1611,7 @@ _mm_store_ps(float *__p, __m128 __a) static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_ps(float *__p, __m128 __a) { - __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0); + __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0); _mm_store_ps(__p, __a); } @@ -1636,7 +1636,7 @@ _mm_stream_pi(__m64 *__p, __m64 __a) static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_ps(float *__p, __m128 __a) { - __builtin_ia32_movntps(__p, __a); + __builtin_ia32_movntps(__p, (__v4sf)__a); } static __inline__ void __DEFAULT_FN_ATTRS @@ -1744,31 +1744,31 @@ _mm_setcsr(unsigned int __i) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b) { - return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); + return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b) { - return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); + return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b) { - return __builtin_shufflevector(__a, __b, 4, 1, 2, 3); + return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 4, 1, 2, 3); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehl_ps(__m128 __a, __m128 __b) { - return __builtin_shufflevector(__a, __b, 6, 7, 2, 3); + return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movelh_ps(__m128 __a, __m128 __b) { - return __builtin_shufflevector(__a, __b, 0, 1, 4, 5); + return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -1867,7 +1867,7 @@ _mm_cvtps_pi8(__m128 __a) static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_ps(__m128 __a) { - return __builtin_ia32_movmskps(__a); + return __builtin_ia32_movmskps((__v4sf)__a); } Modified: cfe/trunk/lib/Headers/xopintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xopintrin.h?rev=269632&r1=269631&r2=269632&view=diff ============================================================================== --- cfe/trunk/lib/Headers/xopintrin.h (original) +++ cfe/trunk/lib/Headers/xopintrin.h Mon May 16 01:38:42 2016 @@ -198,13 +198,13 @@ _mm_hsubq_epi32(__m128i __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C); + return (__m128i)__builtin_ia32_vpcmov((__v2di)__A, (__v2di)__B, (__v2di)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C); + return (__m256i)__builtin_ia32_vpcmov_256((__v4di)__A, (__v4di)__B, (__v4di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits