On Thu, 2015-09-03 at 23:26 +0800, Andrew Pinski wrote: > On Thu, Sep 3, 2015 at 11:20 PM, Bill Schmidt > <wschm...@linux.vnet.ibm.com> wrote: > > Hi, > > > > It was pointed out to me recently that multiplying two vector chars is > > performed using scalarization, even though we have hardware support for > > byte multiplies in vectors. This patch adds an expansion for mulv16qi3 > > to correct this. > > > > The expansion is pretty simple. We do a multiply-even and multiply-odd > > to create halfword results, and then use a permute to extract the > > low-order bytes of each result. This particular form of a permute uses > > a different set of input/output vector modes than have been used before, > > so I added the altivec_vperm_v8hiv16qi insn to represent this. (The two > > source operands are vector halfword types, while the target operand is a > > vector char type.) > > This seems like something which should be done in vector generic > rather than the back-end. I am not blocking this patch but just > suggesting an alternative way of doing this instead of a target > specific patch.
Currently vector-generic checks whether the back end implements the smul_optab for the specific vector type; if not, it scalarizes the code. I'm not sure what else it should do. Targets might implement the character multiply in several different ways (directly, using mult-even/mult-odd, using mult-hi/mult-lo), so anything other than leaving the multiply in place could be wrong for some targets. Am I misunderstanding your point? Thanks, Bill > > Thanks, > Andrew > > > > > I've added two test variants, one to test the code generation, and one > > executable test to check correctness. One other test failed with this > > change. This turned out to be because PowerPC was excluded from the > > check_effective_target_vect_char_mult target support test. I resolved > > this by adding check_effective_target_powerpc_altivec to that test. > > > > Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no > > regressions. Is this ok for trunk? > > > > Thanks, > > Bill > > > > > > [gcc] > > > > 2015-09-03 Bill Schmidt <wschm...@vnet.linux.ibm.com> > > > > * config/rs6000/altivec.md (altivec_vperm_v8hiv16qi): New > > define_insn. > > (mulv16qi3): New define_expand. > > > > [gcc/testsuite] > > > > 2015-09-03 Bill Schmidt <wschm...@vnet.linux.ibm.com> > > > > * gcc.target/powerpc/vec-mult-char-1.c: New test. > > * gcc.target/powerpc/vec-mult-char-2.c: New test. > > > > > > Index: gcc/config/rs6000/altivec.md > > =================================================================== > > --- gcc/config/rs6000/altivec.md (revision 227416) > > +++ gcc/config/rs6000/altivec.md (working copy) > > @@ -1957,6 +1957,16 @@ > > "vperm %0,%1,%2,%3" > > [(set_attr "type" "vecperm")]) > > > > +(define_insn "altivec_vperm_v8hiv16qi" > > + [(set (match_operand:V16QI 0 "register_operand" "=v") > > + (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") > > + (match_operand:V8HI 2 "register_operand" "v") > > + (match_operand:V16QI 3 "register_operand" "v")] > > + UNSPEC_VPERM))] > > + "TARGET_ALTIVEC" > > + "vperm %0,%1,%2,%3" > > + [(set_attr "type" "vecperm")]) > > + > > (define_expand "altivec_vperm_<mode>_uns" > > [(set (match_operand:VM 0 "register_operand" "=v") > > (unspec:VM [(match_operand:VM 1 "register_operand" "v") > > @@ -3161,6 +3171,34 @@ > > "<VI_unit>" > > "") > > > > +(define_expand "mulv16qi3" > > + [(set (match_operand:V16QI 0 "register_operand" "=v") > > + (mult:V16QI (match_operand:V16QI 1 "register_operand" "v") > > + (match_operand:V16QI 2 "register_operand" "v")))] > > + "TARGET_ALTIVEC" > > + " > > +{ > > + rtx even = gen_reg_rtx (V8HImode); > > + rtx odd = gen_reg_rtx (V8HImode); > > + rtx mask = gen_reg_rtx (V16QImode); > > + rtvec v = rtvec_alloc (16); > > + bool be = BYTES_BIG_ENDIAN; > > + int i; > > + > > + for (i = 0; i < 8; ++i) { > > + RTVEC_ELT (v, 2 * i) > > + = gen_rtx_CONST_INT (QImode, be ? 2 * i + 1 : 31 - 2 * i); > > + RTVEC_ELT (v, 2 * i + 1) > > + = gen_rtx_CONST_INT (QImode, be ? 2 * i + 17 : 15 - 2 * i); > > + } > > + > > + emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); > > + emit_insn (gen_altivec_vmulesb (even, operands[1], operands[2])); > > + emit_insn (gen_altivec_vmulosb (odd, operands[1], operands[2])); > > + emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], even, odd, mask)); > > + DONE; > > +}") > > + > > (define_expand "altivec_negv4sf2" > > [(use (match_operand:V4SF 0 "register_operand" "")) > > (use (match_operand:V4SF 1 "register_operand" ""))] > > Index: gcc/testsuite/gcc.target/powerpc/vec-mult-char-1.c > > =================================================================== > > --- gcc/testsuite/gcc.target/powerpc/vec-mult-char-1.c (revision 0) > > +++ gcc/testsuite/gcc.target/powerpc/vec-mult-char-1.c (working copy) > > @@ -0,0 +1,53 @@ > > +/* { dg-do run { target { powerpc*-*-* && vmx_hw } } } */ > > +/* { dg-require-effective-target powerpc_altivec_ok } */ > > +/* { dg-options "-maltivec" } */ > > + > > +#include <altivec.h> > > + > > +extern void abort (void); > > + > > +vector unsigned char vmului(vector unsigned char v, > > + vector unsigned char i) > > +{ > > + return v * i; > > +} > > + > > +vector signed char vmulsi(vector signed char v, > > + vector signed char i) > > +{ > > + return v * i; > > +} > > + > > +int main () > > +{ > > + vector unsigned char a = {2, 4, 6, 8, 10, 12, 14, 16, > > + 18, 20, 22, 24, 26, 28, 30, 32}; > > + vector unsigned char b = {3, 6, 9, 12, 15, 18, 21, 24, > > + 27, 30, 33, 36, 39, 42, 45, 48}; > > + vector unsigned char c = vmului (a, b); > > + vector unsigned char expect_c = {6, 24, 54, 96, 150, 216, 38, 128, > > + 230, 88, 214, 96, 246, 152, 70, 0}; > > + > > + vector signed char d = {2, -4, 6, -8, 10, -12, 14, -16, > > + 18, -20, 22, -24, 26, -28, 30, -32}; > > + vector signed char e = {3, 6, -9, -12, 15, 18, -21, -24, > > + 27, 30, -33, -36, 39, 42, -45, -48}; > > + vector signed char f = vmulsi (d, e); > > + vector signed char expect_f = {6, -24, -54, 96, -106, 40, -38, -128, > > + -26, -88, 42, 96, -10, 104, -70, 0}; > > + > > + vector signed char g = {127, -128, 126, -126, 125, -125, 124, -124, > > + 123, -123, 122, -122, 121, -121, 120, -120}; > > + vector signed char h = { 2, 2, -2, -2, 127, 127, -128, -128, > > + 10, 10, -10, -10, 64, 65, -64, -65}; > > + vector signed char i = vmulsi (g, h); > > + vector signed char expect_i = {-2, 0, 4, -4, 3, -3, 0, 0, > > + -50, 50, 60, -60, 64, 71, 0, 120}; > > + > > + if (!vec_all_eq (c, expect_c)) > > + abort (); > > + if (!vec_all_eq (f, expect_f)) > > + abort (); > > + if (!vec_all_eq (i, expect_i)) > > + abort (); > > +} > > Index: gcc/testsuite/gcc.target/powerpc/vec-mult-char-2.c > > =================================================================== > > --- gcc/testsuite/gcc.target/powerpc/vec-mult-char-2.c (revision 0) > > +++ gcc/testsuite/gcc.target/powerpc/vec-mult-char-2.c (working copy) > > @@ -0,0 +1,21 @@ > > +/* { dg-do compile { target { powerpc*-*-* && vmx_hw } } } */ > > +/* { dg-require-effective-target powerpc_altivec_ok } */ > > +/* { dg-options "-maltivec" } */ > > + > > +#include <altivec.h> > > + > > +vector unsigned char vmului(vector unsigned char v, > > + vector unsigned char i) > > +{ > > + return v * i; > > +} > > + > > +vector signed char vmulsi(vector signed char v, > > + vector signed char i) > > +{ > > + return v * i; > > +} > > + > > +/* { dg-final { scan-assembler-times "vmulesb" 2 } } */ > > +/* { dg-final { scan-assembler-times "vmulosb" 2 } } */ > > +/* { dg-final { scan-assembler-times "vperm" 2 } } */ > > Index: gcc/testsuite/lib/target-supports.exp > > =================================================================== > > --- gcc/testsuite/lib/target-supports.exp (revision 227416) > > +++ gcc/testsuite/lib/target-supports.exp (working copy) > > @@ -4577,7 +4577,8 @@ proc check_effective_target_vect_char_mult { } { > > if { [istarget aarch64*-*-*] > > || [istarget ia64-*-*] > > || [istarget i?86-*-*] || [istarget x86_64-*-*] > > - || [check_effective_target_arm32] } { > > + || [check_effective_target_arm32] > > + || [check_effective_target_powerpc_altivec] } { > > set et_vect_char_mult_saved 1 > > } > > } > > > > >