On Tue, Jun 7, 2016 at 1:58 AM, Kelvin Nilsen <kdnil...@linux.vnet.ibm.com> wrote: > > This patch adds built-in function support for the ISA 3.0 vabsub, > vabsduh, and vabsduw instructions. > > I have bootstrapped and tested on powerpc64le-unkonwn-linux-gnu with no > regressions. Is this ok for the trunk? > > I have also tested against the gcc-6 branch without regressions. Is > this ok for backporting to gcc6 after a few days of burn-in time on the > trunk?
It sounds like these match SAD_EXPR and thus should allow vectorizing gcc.dg/vect/slp-reduc-sad.c and gcc.dg/vect/vect-reduc-sad.c using SAD? Richard. > gcc/testsuite/ChangeLog: > > 2016-06-06 Kelvin Nilsen <kel...@gcc.gnu.org> > > * gcc.target/powerpc/vadsdu-0.c: New test. > * gcc.target/powerpc/vadsdu-1.c: New test. > * gcc.target/powerpc/vadsdu-2.c: New test. > * gcc.target/powerpc/vadsdu-3.c: New test. > * gcc.target/powerpc/vadsdu-4.c: New test. > * gcc.target/powerpc/vadsdu-5.c: New test. > * gcc.target/powerpc/vadsdub-1.c: New test. > * gcc.target/powerpc/vadsdub-2.c: New test. > * gcc.target/powerpc/vadsduh-1.c: New test. > * gcc.target/powerpc/vadsduh-2.c: New test. > * gcc.target/powerpc/vadsduw-1.c: New test. > * gcc.target/powerpc/vadsduw-2.c: New test. > > > gcc/ChangeLog: > > 2016-06-06 Kelvin Nilsen <kel...@gcc.gnu.org> > > * config/rs6000/altivec.h (vec_adu): New macro for vector absolute > difference unsigned. > (vec_adub): New macro for vector absolute difference unsigned > byte. > (vec_aduh): New macro for vector absolute difference unsigned > half-word. > (vec_aduw): New macro for vector absolute difference unsigned word. > * config/rs6000/altivec.md (UNSPEC_VADU): New value. > (vadu<mode>3): New insn. > (*p9_vadu<mode>3): New insn. > * config/rs6000/rs6000-builtin.def (vadub): New built-in > definition. > (vaduh): New built-in definition. > (vaduw): New built-in definition. > (vadu): New overloaded built-in definition. > (vadub): New overloaded built-in definition. > (vaduh): New overloaded built-in definition. > (vaduw): New overloaded built-in definition. > * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add > overloaded vector absolute difference unsigned functions. > * doc/extend.texi (PowerPC AltiVec Built-in Functions): Document > the ISA 3.0 vector absolute difference unsigned built-in functions. > > Index: gcc/config/rs6000/altivec.h > =================================================================== > --- gcc/config/rs6000/altivec.h (revision 237045) > +++ gcc/config/rs6000/altivec.h (working copy) > @@ -401,6 +401,11 @@ > #define vec_vprtybq __builtin_vec_vprtybq > #endif > > +#define vec_adu __builtin_vec_vadu > +#define vec_adub __builtin_vec_vadub > +#define vec_aduh __builtin_vec_vaduh > +#define vec_aduw __builtin_vec_vaduw > + > #define vec_slv __builtin_vec_vslv > #define vec_srv __builtin_vec_vsrv > #endif > Index: gcc/config/rs6000/altivec.md > =================================================================== > --- gcc/config/rs6000/altivec.md (revision 237045) > +++ gcc/config/rs6000/altivec.md (working copy) > @@ -114,6 +114,7 @@ > UNSPEC_STVLXL > UNSPEC_STVRX > UNSPEC_STVRXL > + UNSPEC_VADU > UNSPEC_VSLV > UNSPEC_VSRV > UNSPEC_VMULWHUB > @@ -3464,6 +3465,25 @@ > [(set_attr "length" "4") > (set_attr "type" "vecsimple")]) > > +;; Vector absolute difference unsigned > +(define_expand "vadu<mode>3" > + [(set (match_operand:VI 0 "register_operand" "") > + (unspec:VI [(match_operand:VI 1 "register_operand" "") > + (match_operand:VI 2 "register_operand" "")] > + UNSPEC_VADU))] > + "TARGET_P9_VECTOR") > + > +;; Vector absolute difference unsigned > +(define_insn "*p9_vadu<mode>3" > + [(set (match_operand:VI 0 "register_operand" "=v") > + (unspec:VI [(match_operand:VI 1 "register_operand" "v") > + (match_operand:VI 2 "register_operand" "v")] > + UNSPEC_VADU))] > + "TARGET_P9_VECTOR" > + "vabsdu<wd> %0, %1, %2" > + [(set_attr "type" "add") > + (set_attr "length" "4")]) > + > ;; Vector count trailing zeros > (define_insn "*p9v_ctz<mode>2" > [(set (match_operand:VI2 0 "register_operand" "=v") > Index: gcc/config/rs6000/rs6000-builtin.def > =================================================================== > --- gcc/config/rs6000/rs6000-builtin.def (revision 237045) > +++ gcc/config/rs6000/rs6000-builtin.def (working copy) > @@ -1757,6 +1757,17 @@ BU_P9V_AV_2 (VSRV, "vsrv", > CONST, vsrv) > BU_P9V_OVERLOAD_2 (VSLV, "vslv") > BU_P9V_OVERLOAD_2 (VSRV, "vsrv") > > +/* 2 argument vector functions added in ISA 3.0 (power9). */ > +BU_P9V_AV_2 (VADUB, "vadub", CONST, vaduv16qi3) > +BU_P9V_AV_2 (VADUH, "vaduh", CONST, vaduv8hi3) > +BU_P9V_AV_2 (VADUW, "vaduw", CONST, vaduv4si3) > + > +/* ISA 3.0 vector overloaded 2 argument functions. */ > +BU_P9V_OVERLOAD_2 (VADU, "vadu") > +BU_P9V_OVERLOAD_2 (VADUB, "vadub") > +BU_P9V_OVERLOAD_2 (VADUH, "vaduh") > +BU_P9V_OVERLOAD_2 (VADUW, "vaduw") > + > > /* 2 argument extended divide functions added in ISA 2.06. */ > BU_P7_MISC_2 (DIVWE, "divwe", CONST, dive_si) > Index: gcc/config/rs6000/rs6000-c.c > =================================================================== > --- gcc/config/rs6000/rs6000-c.c (revision 237045) > +++ gcc/config/rs6000/rs6000-c.c (working copy) > @@ -4247,6 +4247,28 @@ const struct altivec_builtin_types altivec_overloa > { P9V_BUILTIN_VEC_VCTZD, P9V_BUILTIN_VCTZD, > RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, > > + { P9V_BUILTIN_VEC_VADU, P9V_BUILTIN_VADUB, > + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, > + RS6000_BTI_unsigned_V16QI, 0 }, > + { P9V_BUILTIN_VEC_VADU, P9V_BUILTIN_VADUH, > + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, > + RS6000_BTI_unsigned_V8HI, 0 }, > + { P9V_BUILTIN_VEC_VADU, P9V_BUILTIN_VADUW, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > + RS6000_BTI_unsigned_V4SI, 0 }, > + > + { P9V_BUILTIN_VEC_VADUB, P9V_BUILTIN_VADUB, > + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, > + RS6000_BTI_unsigned_V16QI, 0 }, > + > + { P9V_BUILTIN_VEC_VADUH, P9V_BUILTIN_VADUH, > + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, > + RS6000_BTI_unsigned_V8HI, 0 }, > + > + { P9V_BUILTIN_VEC_VADUW, P9V_BUILTIN_VADUW, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > + RS6000_BTI_unsigned_V4SI, 0 }, > + > { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, > RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, > { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, > Index: gcc/doc/extend.texi > =================================================================== > --- gcc/doc/extend.texi (revision 237045) > +++ gcc/doc/extend.texi (working copy) > @@ -17375,6 +17375,31 @@ result returned from the @code{vec_srv} function i > (0x07 & shift_distance[i]))}, > with this resulting value coerced to the @code{unsigned char} type. > > +The following built-in functions are available for the PowerPC family > +of processors, starting with ISA 3.0 or later (@option{-mcpu=power9}) > +or with @option{-mpower9-vector}: > +@smallexample > +__vector unsigned char > +vec_adu (__vector unsigned char arg1, __vector unsigned char arg2); > +__vector unsighed short > +vec_adu (__vector unsigned short arg1, __vector unsigned short arg2); > +__vector unsigned int > +vec_adu (__vector unsigned int arg1, __vector unsigned int arg2); > + > +__vector unsigned char > +vec_adub (__vector unsigned char arg1, __vector unsigned char arg2); > +__vector unsighed short > +vec_aduh (__vector unsigned short arg1, __vector unsigned short arg2); > +__vector unsigned int > +vec_aduw (__vector unsigned int arg1, __vector unsigned int arg2); > +@end smallexample > + > +The @code{vec_adu}, @code{vec_adub}, @code{vec_aduh}, and > +@code{vec_aduw} built-in functions each computes the absolute > +differences of the pairs of vector elements supplied in its two vector > +arguments, placing the absolute differences into the corresponding > +elements of the vector result. > + > If the cryptographic instructions are enabled (@option{-mcrypto} or > @option{-mcpu=power8}), the following builtins are enabled. > > Index: gcc/testsuite/gcc.target/powerpc/vadsdu-0.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsdu-0.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsdu-0.c (working copy) > @@ -0,0 +1,23 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned int > +doAbsoluteDifferenceUnsignedInt (__vector unsigned int *p, > + __vector unsigned int *q) > +{ > + __vector unsigned int source_1, source_2; > + __vector unsigned int result; > + > + source_1 = *p; > + source_2 = *q; > + > + result = __builtin_vec_vadu (source_1, source_2); > + return result; > +} > + > +/* { dg-final { scan-assembler "vabsduw" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsdu-1.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsdu-1.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsdu-1.c (working copy) > @@ -0,0 +1,22 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned int > +doAbsoluteDifferenceUnsignedIntMacro (__vector unsigned int *p, > + __vector unsigned int *q) > +{ > + __vector unsigned int result, source_1, source_2; > + > + source_1 = *p; > + source_2 = *q; > + > + result = vec_adu (source_1, source_2); > + return result; > +} > + > +/* { dg-final { scan-assembler "vabsduw" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsdu-2.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsdu-2.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsdu-2.c (working copy) > @@ -0,0 +1,23 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned short > +doAbsoluteDifferenceUnsignedShort (__vector unsigned short *p, > + __vector unsigned short *q) > +{ > + __vector unsigned short source_1, source_2; > + __vector unsigned short result; > + > + source_1 = *p; > + source_2 = *q; > + > + result = __builtin_vec_vadu (source_1, source_2); > + return result; > +} > + > +/* { dg-final { scan-assembler "vabsduh" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsdu-3.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsdu-3.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsdu-3.c (working copy) > @@ -0,0 +1,22 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned short > +doAbsoluteDifferenceUnsignedShortMacro (__vector unsigned short *p, > + __vector unsigned short *q) > +{ > + __vector unsigned short result, source_1, source_2; > + > + source_1 = *p; > + source_2 = *q; > + > + result = vec_adu (source_1, source_2); > + return result; > +} > + > +/* { dg-final { scan-assembler "vabsduh" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsdu-4.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsdu-4.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsdu-4.c (working copy) > @@ -0,0 +1,23 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned char > +doAbsoluteDifferenceUnsignedChar (__vector unsigned char *p, > + __vector unsigned char *q) > +{ > + __vector unsigned char source_1, source_2; > + __vector unsigned char result; > + > + source_1 = *p; > + source_2 = *q; > + > + result = __builtin_vec_vadu (source_1, source_2); > + return result; > +} > + > +/* { dg-final { scan-assembler "vabsdub" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsdu-5.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsdu-5.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsdu-5.c (working copy) > @@ -0,0 +1,22 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned char > +doAbsoluteDifferenceUnsignedCharMacro (__vector unsigned char *p, > + __vector unsigned char *q) > +{ > + __vector unsigned char result, source_1, source_2; > + > + source_1 = *p; > + source_2 = *q; > + > + result = vec_adu (source_1, source_2); > + return result; > +} > + > +/* { dg-final { scan-assembler "vabsdub" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsdub-1.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsdub-1.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsdub-1.c (working copy) > @@ -0,0 +1,23 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned char > +doAbsoluteDifferenceUnsigned (__vector unsigned char *p, > + __vector unsigned char *q) > +{ > + __vector unsigned char source_1, source_2; > + __vector unsigned char uc_result; > + > + source_1 = *p; > + source_2 = *q; > + > + uc_result = __builtin_vec_vadub (source_1, source_2); > + return uc_result; > +} > + > +/* { dg-final { scan-assembler "vabsdub" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsdub-2.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsdub-2.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsdub-2.c (working copy) > @@ -0,0 +1,23 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned char > +doAbsoluteDifferenceUnsigned (__vector unsigned char *p, > + __vector unsigned char *q) > +{ > + __vector unsigned char source_1, source_2; > + __vector unsigned char uc_result; > + > + source_1 = *p; > + source_2 = *q; > + > + uc_result = __builtin_vec_vadub (source_1, source_2); > + return uc_result; > +} > + > +/* { dg-final { scan-assembler "vabsdub" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsduh-1.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsduh-1.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsduh-1.c (working copy) > @@ -0,0 +1,23 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned short > +doAbsoluteDifferenceUnsigned (__vector unsigned short *p, > + __vector unsigned short *q) > +{ > + __vector unsigned short source_1, source_2; > + __vector unsigned short us_result; > + > + source_1 = *p; > + source_2 = *q; > + > + us_result = __builtin_vec_vaduh (source_1, source_2); > + return us_result; > +} > + > +/* { dg-final { scan-assembler "vabsduh" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsduh-2.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsduh-2.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsduh-2.c (working copy) > @@ -0,0 +1,22 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned short > +doAbsoluteDifferenceUnsignedMacro (__vector unsigned short *p, > + __vector unsigned short *q) > +{ > + __vector unsigned short result, source_1, source_2; > + > + source_1 = *p; > + source_2 = *q; > + > + result = vec_aduh (source_1, source_2); > + return result; > +} > + > +/* { dg-final { scan-assembler "vabsduh" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsduw-1.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsduw-1.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsduw-1.c (working copy) > @@ -0,0 +1,23 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned int > +doAbsoluteDifferenceUnsigned (__vector unsigned int *p, > + __vector unsigned int *q) > +{ > + __vector unsigned int source_1, source_2; > + __vector unsigned int ui_result; > + > + source_1 = *p; > + source_2 = *q; > + > + ui_result = __builtin_vec_vaduw (source_1, source_2); > + return ui_result; > +} > + > +/* { dg-final { scan-assembler "vabsduw" } } */ > Index: gcc/testsuite/gcc.target/powerpc/vadsduw-2.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vadsduw-2.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vadsduw-2.c (working copy) > @@ -0,0 +1,22 @@ > +/* { dg-do compile { target { powerpc*-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > +/* { dg-require-effective-target p9vector_hw } */ > +/* { dg-options "-mcpu=power9" } */ > + > +/* This test should succeed on both 32- and 64-bit configurations. */ > +#include <altivec.h> > + > +__vector unsigned int > +doAbsoluteDifferenceUnsignedMacro (__vector unsigned int *p, > + __vector unsigned int *q) > +{ > + __vector unsigned int result, source_1, source_2; > + > + source_1 = *p; > + source_2 = *q; > + > + result = vec_aduw (source_1, source_2); > + return result; > +} > + > +/* { dg-final { scan-assembler "vabsduw" } } */ >