On Jan 25, 2018, at 4:09 AM, Alan Modra <amo...@gmail.com> wrote: > > vbpermq produces its output in bits 48..63 of the target vector reg, > so the output cannot be lane swapped. Bootstrapped and regression > tested powerpc64le-linux. OK to apply mainline, and backport to the > branches?
I can't approve, but FWIW, I agree with this patch. Bill > > gcc/ > PR target/84033 > * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Exclude > UNSPEC_VBPERMQ. Sort other unspecs. > gcc/testsuite/ > PR target/84033 > * gcc.target/powerpc/swaps-p8-46.c: New. > > diff --git a/gcc/config/rs6000/rs6000-p8swap.c > b/gcc/config/rs6000/rs6000-p8swap.c > index 1f95290..ffcbba9 100644 > --- a/gcc/config/rs6000/rs6000-p8swap.c > +++ b/gcc/config/rs6000/rs6000-p8swap.c > @@ -741,6 +741,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special) > { > default: > break; > + case UNSPEC_VBPERMQ: > case UNSPEC_VMRGH_DIRECT: > case UNSPEC_VMRGL_DIRECT: > case UNSPEC_VPACK_SIGN_SIGN_SAT: > @@ -762,8 +763,14 @@ rtx_is_swappable_p (rtx op, unsigned int *special) > case UNSPEC_VSUMSWS: > case UNSPEC_VSUMSWS_DIRECT: > case UNSPEC_VSX_CONCAT: > + case UNSPEC_VSX_CVDPSPN: > + case UNSPEC_VSX_CVSPDP: > + case UNSPEC_VSX_CVSPDPN: > + case UNSPEC_VSX_EXTRACT: > case UNSPEC_VSX_SET: > case UNSPEC_VSX_SLDWI: > + case UNSPEC_VSX_VEC_INIT: > + case UNSPEC_VSX_VSLO: > case UNSPEC_VUNPACK_HI_SIGN: > case UNSPEC_VUNPACK_HI_SIGN_DIRECT: > case UNSPEC_VUNPACK_LO_SIGN: > @@ -774,12 +781,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special) > case UNSPEC_VUPKLPX: > case UNSPEC_VUPKLS_V4SF: > case UNSPEC_VUPKLU_V4SF: > - case UNSPEC_VSX_CVDPSPN: > - case UNSPEC_VSX_CVSPDP: > - case UNSPEC_VSX_CVSPDPN: > - case UNSPEC_VSX_EXTRACT: > - case UNSPEC_VSX_VSLO: > - case UNSPEC_VSX_VEC_INIT: > return 0; > case UNSPEC_VSPLT_DIRECT: > case UNSPEC_VSX_XXSPLTD: > diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c > b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c > new file mode 100644 > index 0000000..23494b6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c > @@ -0,0 +1,34 @@ > +/* { dg-do run { target { powerpc64le-*-* } } } */ > +/* { dg-require-effective-target powerpc_p8vector_ok } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power8" } } */ > +/* { dg-options "-mcpu=power8 -O2 " } */ > + > +typedef __attribute__ ((__aligned__ (8))) unsigned long long __m64; > +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); > + > +/* PR84033. Extracted from xmmintrin.h but with a pointer param to > + allow swaps to happen when not inline. */ > +int __attribute__ ((__noinline__)) > +_mm_movemask_ps (__m128 *__A) > +{ > + __vector __m64 result; > + static const __vector unsigned int perm_mask = > + { > + 0x00204060, 0x80808080, 0x80808080, 0x80808080 > + }; > + > + result = (__vector __m64) > + __builtin_vec_vbpermq ((__vector unsigned char) (*__A), > + (__vector unsigned char) perm_mask); > + return result[1]; > +} > + > +int > +main (void) > +{ > + union { unsigned int i[4]; __m128 m; } x > + = { 0x80000000, 0x80000000, 0x7fffffff, 0x7fffffff }; > + if (_mm_movemask_ps (&x.m) != 3) > + __builtin_abort (); > + return 0; > +} > > -- > Alan Modra > Australia Development Lab, IBM >