vbpermq produces its output in bits 48..63 of the target vector reg, so the output cannot be lane swapped. Bootstrapped and regression tested powerpc64le-linux. OK to apply mainline, and backport to the branches?
gcc/ PR target/84033 * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Exclude UNSPEC_VBPERMQ. Sort other unspecs. gcc/testsuite/ PR target/84033 * gcc.target/powerpc/swaps-p8-46.c: New. diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index 1f95290..ffcbba9 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -741,6 +741,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special) { default: break; + case UNSPEC_VBPERMQ: case UNSPEC_VMRGH_DIRECT: case UNSPEC_VMRGL_DIRECT: case UNSPEC_VPACK_SIGN_SIGN_SAT: @@ -762,8 +763,14 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VSUMSWS: case UNSPEC_VSUMSWS_DIRECT: case UNSPEC_VSX_CONCAT: + case UNSPEC_VSX_CVDPSPN: + case UNSPEC_VSX_CVSPDP: + case UNSPEC_VSX_CVSPDPN: + case UNSPEC_VSX_EXTRACT: case UNSPEC_VSX_SET: case UNSPEC_VSX_SLDWI: + case UNSPEC_VSX_VEC_INIT: + case UNSPEC_VSX_VSLO: case UNSPEC_VUNPACK_HI_SIGN: case UNSPEC_VUNPACK_HI_SIGN_DIRECT: case UNSPEC_VUNPACK_LO_SIGN: @@ -774,12 +781,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VUPKLPX: case UNSPEC_VUPKLS_V4SF: case UNSPEC_VUPKLU_V4SF: - case UNSPEC_VSX_CVDPSPN: - case UNSPEC_VSX_CVSPDP: - case UNSPEC_VSX_CVSPDPN: - case UNSPEC_VSX_EXTRACT: - case UNSPEC_VSX_VSLO: - case UNSPEC_VSX_VEC_INIT: return 0; case UNSPEC_VSPLT_DIRECT: case UNSPEC_VSX_XXSPLTD: diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c new file mode 100644 index 0000000..23494b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c @@ -0,0 +1,34 @@ +/* { dg-do run { target { powerpc64le-*-* } } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O2 " } */ + +typedef __attribute__ ((__aligned__ (8))) unsigned long long __m64; +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); + +/* PR84033. Extracted from xmmintrin.h but with a pointer param to + allow swaps to happen when not inline. */ +int __attribute__ ((__noinline__)) +_mm_movemask_ps (__m128 *__A) +{ + __vector __m64 result; + static const __vector unsigned int perm_mask = + { + 0x00204060, 0x80808080, 0x80808080, 0x80808080 + }; + + result = (__vector __m64) + __builtin_vec_vbpermq ((__vector unsigned char) (*__A), + (__vector unsigned char) perm_mask); + return result[1]; +} + +int +main (void) +{ + union { unsigned int i[4]; __m128 m; } x + = { 0x80000000, 0x80000000, 0x7fffffff, 0x7fffffff }; + if (_mm_movemask_ps (&x.m) != 3) + __builtin_abort (); + return 0; +} -- Alan Modra Australia Development Lab, IBM