A patch to address this problem report was committed on 3/15/2019. Some of the new regressions tests submitted with that initial patch failed on P8 big-endian and on P9 little-endian.
This new patch addresses the code generation problems that were uncovered by these failing tests. Additionally, this new patch corrects some of the expected instruction counts for certain previously existing regression tests on certain targets to adjust for changes in the generated code. This new patch has been bootstrapped and tested without regressions on powerpcle-unknown-linux (both P8 and P9) and on powerpc-linux (P7 and P8, both -m32 and -m64). Is this ok for trunk and backports? Thanks. gcc/ChangeLog: 2019-04-09 Kelvin Nilsen <kel...@gcc.gnu.org> PR target/87532 * config/rs6000/rs6000.c (rs6000_split_vec_extract_var): Use inner mode of vector rather than mode of destination for move instruction. * config/rs6000/vsx.md (*vsx_extract_<mode>_<VS_scalar>mode_var): Use QI inner mode with V16QI vector mode. gcc/testsuite/ChangeLog: 2019-04-09 Kelvin Nilsen <kel...@gcc.gnu.org> PR target/87532 * gcc.target/powerpc/fold-vec-extract-char.p8.c: Adjust expected instruction counts. * gcc.target/powerpc/fold-vec-extract-int.p8.c: Likewise. * gcc.target/powerpc/fold-vec-extract-short.p8.c: Likewise.: Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 270127) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -7167,7 +7167,7 @@ rtx tmp_altivec) { machine_mode mode = GET_MODE (src); - machine_mode scalar_mode = GET_MODE (dest); + machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src)); unsigned scalar_size = GET_MODE_SIZE (scalar_mode); int byte_shift = exact_log2 (scalar_size); Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (revision 270127) +++ gcc/config/rs6000/vsx.md (working copy) @@ -3739,9 +3739,9 @@ DONE; }) -(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var" - [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r") - (zero_extend:SDI +(define_insn_and_split "*vsx_extract_<mode>_<VS_scalar>mode_var" + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r") + (zero_extend:<VS_scalar> (unspec:<VSX_EXTRACT_I:VS_scalar> [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] @@ -3753,7 +3753,7 @@ "&& reload_completed" [(const_int 0)] { - machine_mode smode = <VSX_EXTRACT_I:MODE>mode; + machine_mode smode = <VS_scalar>mode; rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])), operands[1], operands[2], operands[3], operands[4]); Index: gcc/testsuite/gcc.target/powerpc/fold-vec-extract-char.p8.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/fold-vec-extract-char.p8.c (revision 270127) +++ gcc/testsuite/gcc.target/powerpc/fold-vec-extract-char.p8.c (working copy) @@ -6,9 +6,9 @@ /* { dg-options "-mdejagnu-cpu=power8 -O2" } */ // six tests total. Targeting P8LE / P8BE. -// P8 LE variable offset: rldicl, subfic, sldi, mtvsrd, xxpermdi, vslo, mfvsrd, sradi, (extsb) +// P8 LE variable offset: rldicl, subfic, sldi, mtvsrd, xxpermdi, vslo, mfvsrd, sradi, rlwin, (extsb) // P8 LE constant offset: vspltb, mfvsrd, rlwinm, (extsb) -// P8 BE variable offset: sldi, mtvsrd, xxpermdi, vslo, mfvsrd, sradi, (extsb) +// P8 BE variable offset: sldi, mtvsrd, xxpermdi, vslo, mfvsrd, sradi, rlwinm, (extsb) // P8 BE constant offset: vspltb, mfvsrd, rlwinm, (extsb) /* { dg-final { scan-assembler-times {\mrldicl\M} 3 { target { le } } } } */ @@ -21,7 +21,7 @@ /* { dg-final { scan-assembler-times {\msrdi\M} 3 { target lp64 } } } */ /* { dg-final { scan-assembler-times "extsb" 2 } } */ /* { dg-final { scan-assembler-times {\mvspltb\M} 3 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mrlwinm\M} 2 { target lp64} } } */ +/* { dg-final { scan-assembler-times {\mrlwinm\M} 4 { target lp64} } } */ /* multiple codegen variations for -m32. */ /* { dg-final { scan-assembler-times {\mrlwinm\M} 3 { target ilp32} } } */ Index: gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c (revision 270127) +++ gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c (working copy) @@ -7,14 +7,14 @@ // Targeting P8 (LE) and (BE). 6 tests total. // P8 LE constant: vspltw, mfvsrwz, (1:extsw/2:rldicl) -// P8 LE variables: rldicl, subfic, sldi, mtvsrd, xxpermdi, vslo, mfvsrd, sradi, (1:extsw) +// P8 LE variables: subfic, sldi, mtvsrd, xxpermdi, vslo, mfvsrd, sradi, (1:extsw/5:rldicl)) // P8 BE constant: vspltw, mfvsrwz, (1:extsw/2:rldicl) -// P8 BE variables: sldi, mtvsrd, xxpermdi, vslo, mfvsrd, sradi, (1:extsw) +// P8 BE variables: sldi, mtvsrd, xxpermdi, vslo, mfvsrd, sradi, (1:extsw/2:rldicl)) /* { dg-final { scan-assembler-times {\mvspltw\M} 3 { target lp64 } } } */ /* { dg-final { scan-assembler-times {\mmfvsrwz\M} 3 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mrldicl\M} 5 { target { le } } } } */ -/* { dg-final { scan-assembler-times {\mrldicl\M} 2 { target { lp64 && be } } } } */ +/* { dg-final { scan-assembler-times {\mrldicl\M} 7 { target { le } } } } */ +/* { dg-final { scan-assembler-times {\mrldicl\M} 4 { target { lp64 && be } } } } */ /* { dg-final { scan-assembler-times {\msubfic\M} 3 { target { le } } } } */ /* { dg-final { scan-assembler-times {\msldi\M} 3 { target lp64 } } } */ /* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 { target lp64 } } } */ Index: gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p8.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p8.c (revision 270127) +++ gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p8.c (working copy) @@ -6,10 +6,10 @@ /* { dg-options "-mdejagnu-cpu=power8 -O2" } */ // six tests total. Targeting P8, both LE and BE. -// p8 (le) variable offset: rldicl, subfic, sldi, mtvsrd, xxpermdi, vslo, mfvsrd, srdi, *extsh -// p8 (le) const offset: mtvsrd, *extsh/rlwinm -// p8 (be) var offset: sldi, mtvsrd, xxpermdi, vslo, mfvsrd, srdi, *extsh -// p8 (be) const offset: vsplth, mfvsrd, *extsh/rlwinm +// p8 (le) variable offset: rldicl, subfic, sldi, mtvsrd, xxpermdi, vslo, mfvsrd, srdi, (1:extsh/2:rlwinm) +// p8 (le) const offset: mtvsrd, (1:extsh/2:rlwinm) +// p8 (be) var offset: sldi, mtvsrd, xxpermdi, vslo, mfvsrd, srdi, (1:extsh:2:rlwinm) +// p8 (be) const offset: vsplth, mfvsrd, (1:extsh/2:rlwinm) // * - each of the above will have an extsh if the argument is signed. // * - bool and unsigned tests also have an rlwinm. @@ -24,7 +24,7 @@ /* { dg-final { scan-assembler-times "mfvsrd" 6 { target lp64 } } } */ /* { dg-final { scan-assembler-times "srdi" 3 { target lp64 } } } */ /* { dg-final { scan-assembler-times "extsh" 2 { target lp64 } } } */ -/* { dg-final { scan-assembler-times "rlwinm" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "rlwinm" 4 { target lp64 } } } */ /* -m32 codegen tests. */ /* { dg-final { scan-assembler-times {\mli\M} 6 { target ilp32 } } } */