https://gcc.gnu.org/g:a240c5bef969b80dad6c556794e2f61ebf956272
commit a240c5bef969b80dad6c556794e2f61ebf956272 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Thu Oct 17 14:18:38 2024 -0400 Add sha3 tests; Fix bugs. 2024-10-17 Michael Meissner <meiss...@linux.ibm.com> gcc/ * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Remove two incorrect xxeval fusion patterns. gcc/testsuite/ * gcc.target/powerpc/xxeval-1.c: New test * gcc.target/powerpc/xxeval-2.c: Likewise. Diff: --- gcc/config/rs6000/fusion.md | 34 +- gcc/config/rs6000/genfusion.pl | 2 - gcc/testsuite/gcc.target/powerpc/xxeval-1.c | 409 ++++++++++++ gcc/testsuite/gcc.target/powerpc/xxeval-2.c | 936 ++++++++++++++++++++++++++++ 4 files changed, 1359 insertions(+), 22 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 724e4692d101..6b5830908a51 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2150,23 +2150,20 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vorc -> vandc (define_insn "*fuse_vorc_vandc" - [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,v,wa") - (and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,v,wa")) - (match_operand:VM 1 "vector_fusion_operand" "v,v,v,v,wa")) - (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,v,wa")))) - (clobber (match_scratch:VM 4 "=X,X,X,&v,X"))] + [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") + (and:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) + (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) + (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) + (clobber (match_scratch:VM 4 "=X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vandc %3,%3,%2 vorc %3,%1,%0\;vandc %3,%3,%2 vorc %3,%1,%0\;vandc %3,%3,%2 - vorc %4,%1,%0\;vandc %3,%4,%2 - xxeval %x3,%x2,%x1,%x0,4" + vorc %4,%1,%0\;vandc %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8") - (set_attr "prefixed" "*,*,*,*,yes") - (set_attr "isa" "*,*,*,*,xxeval")]) + (set_attr "length" "8")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vxor -> vandc @@ -2861,23 +2858,20 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vorc (define_insn "*fuse_vandc_vorc" - [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,v,wa") - (ior:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,v,wa")) - (match_operand:VM 1 "vector_fusion_operand" "v,v,v,v,wa")) - (not:VM (match_operand:VM 2 "vector_fusion_operand" "v,v,v,v,wa")))) - (clobber (match_scratch:VM 4 "=X,X,X,&v,X"))] + [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") + (ior:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v")) + (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) + (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) + (clobber (match_scratch:VM 4 "=X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vorc %3,%3,%2 vandc %3,%1,%0\;vorc %3,%3,%2 vandc %3,%1,%0\;vorc %3,%3,%2 - vandc %4,%1,%0\;vorc %3,%4,%2 - xxeval %x3,%x2,%x1,%x0,223" + vandc %4,%1,%0\;vorc %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8") - (set_attr "prefixed" "*,*,*,*,yes") - (set_attr "isa" "*,*,*,*,xxeval")]) + (set_attr "length" "8")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector veqv -> vorc diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 57ae80ace51c..89c479f32e8f 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -217,7 +217,6 @@ sub gen_logical_addsubf my %xxeval_fusions = ( "vand_vand" => 1, "vandc_vand" => 2, - "vorc_vandc" => 4, "vxor_vand" => 6, "vor_vand" => 7, "vnor_vand" => 8, @@ -249,7 +248,6 @@ sub gen_logical_addsubf "vorc_vor" => 191, "vandc_vnor" => 208, "vandc_veqv" => 210, - "vandc_vorc" => 223, "vand_vnor" => 224, "vnand_vxor" => 225, "vnand_vor" => 239, diff --git a/gcc/testsuite/gcc.target/powerpc/xxeval-1.c b/gcc/testsuite/gcc.target/powerpc/xxeval-1.c new file mode 100644 index 000000000000..28e0874b3454 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/xxeval-1.c @@ -0,0 +1,409 @@ +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Generate and check most of the vector logical instruction combinations that + may or may not generate xxeval to do a fused operation on power10. */ + +#include <stddef.h> +#include <stdlib.h> +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> + +static int errors = 0; +static int tests = 0; +#endif + +typedef vector unsigned int vector_t; +typedef unsigned int scalar_t; + +/* Vector logical functions. */ +static inline vector_t +vector_and (vector_t x, vector_t y) +{ + return x & y; +} + +static inline vector_t +vector_or (vector_t x, vector_t y) +{ + return x | y; +} + +static inline vector_t +vector_xor (vector_t x, vector_t y) +{ + return x ^ y; +} + +static inline vector_t +vector_andc (vector_t x, vector_t y) +{ + return x & ~y; +} + +static inline vector_t +vector_orc (vector_t x, vector_t y) +{ + return x | ~y; +} + +static inline vector_t +vector_nand (vector_t x, vector_t y) +{ + return ~(x & y); +} + +static inline vector_t +vector_nor (vector_t x, vector_t y) +{ + return ~(x | y); +} + +static inline vector_t +vector_eqv (vector_t x, vector_t y) +{ + return ~(x ^ y); +} + +/* Scalar logical functions. */ +static inline scalar_t +scalar_and (scalar_t x, scalar_t y) +{ + return x & y; +} + +static inline scalar_t +scalar_or (scalar_t x, scalar_t y) +{ + return x | y; +} + +static inline scalar_t +scalar_xor (scalar_t x, scalar_t y) +{ + return x ^ y; +} + +static inline scalar_t +scalar_andc (scalar_t x, scalar_t y) +{ + return x & ~y; +} + +static inline scalar_t +scalar_orc (scalar_t x, scalar_t y) +{ + return x | ~y; +} + +static inline scalar_t +scalar_nand (scalar_t x, scalar_t y) +{ + return ~(x & y); +} + +static inline scalar_t +scalar_nor (scalar_t x, scalar_t y) +{ + return ~(x | y); +} + +static inline scalar_t +scalar_eqv (scalar_t x, scalar_t y) +{ + return ~(x ^ y); +} + + +/* + * Generate one function for each combination that we are checking. Do 4 + * operations: + * + * Use FPR regs that should generate either XXEVAL or XXL* insns; + * Use Altivec registers than may generated fused V* insns; + * Use VSX registers, insure fusing it not done via asm; (and) + * Use GPR registers on scalar operations. + */ + +#ifdef DEBUG +#define TRACE(INNER, OUTER) \ + do { \ + tests++; \ + printf ("%s_%s\n", INNER, OUTER); \ + fflush (stdout); \ + } while (0) \ + +#define FAILED(INNER, OUTER) \ + do { \ + errors++; \ + printf ("%s_%s failed\n", INNER, OUTER); \ + fflush (stdout); \ + } while (0) \ + +#else +#define TRACE(INNER, OUTER) +#define FAILED(INNER, OUTER) abort () +#endif + +#define FUSED_FUNC(INNER, OUTER) \ +static void \ +INNER ## _ ## OUTER (vector_t a, vector_t b, vector_t c) \ +{ \ + vector_t f_a, f_b, f_c, f_r, f_t; \ + vector_t v_a, v_b, v_c, v_r, v_t; \ + vector_t w_a, w_b, w_c, w_r, w_t; \ + scalar_t s_a, s_b, s_c, s_r, s_t; \ + \ + TRACE (#INNER, #OUTER); \ + \ + f_a = a; \ + f_b = b; \ + f_c = c; \ + \ + __asm__ (" # fpr regs: %x0,%x1,%x2 " #INNER "_" #OUTER \ + : "+d" (f_a), \ + "+d" (f_b), \ + "+d" (f_c)); \ + \ + f_t = vector_ ## INNER (f_b, f_c); \ + f_r = vector_ ## OUTER (f_a, f_t); \ + \ + __asm__ (" # fpr regs result: %x0 " #INNER "_" #OUTER \ + : "+d" (f_r)); \ + \ + v_a = a; \ + v_b = b; \ + v_c = c; \ + \ + __asm__ (" # altivec regs: %x0,%x1,%x2 " #INNER "_" #OUTER \ + : "+v" (v_a), \ + "+v" (v_b), \ + "+v" (v_c)); \ + \ + v_t = vector_ ## INNER (v_b, v_c); \ + v_r = vector_ ## OUTER (v_a, v_t); \ + \ + __asm__ (" # altivec regs result: %x0 " #INNER "_" #OUTER \ + : "+v" (v_r)); \ + \ + w_a = a; \ + w_b = b; \ + w_c = c; \ + \ + __asm__ (" # vsx regs: %x0,%x1,%x2 " #INNER "_" #OUTER \ + : "+wa" (w_a), \ + "+wa" (w_b), \ + "+wa" (w_c)); \ + \ + w_t = vector_ ## INNER (w_b, w_c); \ + __asm__ ("nop # break vsx fusion reg %x0" : "+wa" (w_t)); \ + w_r = vector_ ## OUTER (w_a, w_t); \ + \ + __asm__ (" # vsx regs result: %x0 " #INNER "_" #OUTER \ + : "+wa" (w_r)); \ + \ + s_a = a[0]; \ + s_b = b[0]; \ + s_c = c[0]; \ + \ + __asm__ (" # gpr regs: %0,%1,%2 " #INNER "_" #OUTER \ + : "+r" (s_a), \ + "+r" (s_b), \ + "+r" (s_c)); \ + \ + s_t = scalar_ ## INNER (s_b, s_c); \ + s_r = scalar_ ## OUTER (s_a, s_t); \ + \ + __asm__ (" # gpr regs result: %0 " #INNER "_" #OUTER \ + : "+r" (s_r)); \ + \ + if (!vec_all_eq (w_r, f_r) \ + || !vec_all_eq (w_r, v_r) \ + || s_r != w_r[0]) \ + FAILED (#INNER, #OUTER); \ + \ + return; \ +} + +FUSED_FUNC (and, and) +FUSED_FUNC (andc, and) +FUSED_FUNC (eqv, and) +FUSED_FUNC (nand, and) +FUSED_FUNC (nor, and) +FUSED_FUNC (or, and) +FUSED_FUNC (orc, and) +FUSED_FUNC (xor, and) + +FUSED_FUNC (and, andc) +FUSED_FUNC (andc, andc) +FUSED_FUNC (eqv, andc) +FUSED_FUNC (nand, andc) +FUSED_FUNC (nor, andc) +FUSED_FUNC (or, andc) +FUSED_FUNC (orc, andc) +FUSED_FUNC (xor, andc) + +FUSED_FUNC (and, eqv) +FUSED_FUNC (andc, eqv) +FUSED_FUNC (eqv, eqv) +FUSED_FUNC (nand, eqv) +FUSED_FUNC (nor, eqv) +FUSED_FUNC (or, eqv) +FUSED_FUNC (orc, eqv) +FUSED_FUNC (xor, eqv) + +FUSED_FUNC (and, nand) +FUSED_FUNC (andc, nand) +FUSED_FUNC (eqv, nand) +FUSED_FUNC (nand, nand) +FUSED_FUNC (nor, nand) +FUSED_FUNC (or, nand) +FUSED_FUNC (orc, nand) +FUSED_FUNC (xor, nand) + +FUSED_FUNC (and, nor) +FUSED_FUNC (andc, nor) +FUSED_FUNC (eqv, nor) +FUSED_FUNC (nand, nor) +FUSED_FUNC (nor, nor) +FUSED_FUNC (or, nor) +FUSED_FUNC (orc, nor) +FUSED_FUNC (xor, nor) + +FUSED_FUNC (and, or) +FUSED_FUNC (andc, or) +FUSED_FUNC (eqv, or) +FUSED_FUNC (nand, or) +FUSED_FUNC (nor, or) +FUSED_FUNC (or, or) +FUSED_FUNC (orc, or) +FUSED_FUNC (xor, or) + +FUSED_FUNC (and, orc) +FUSED_FUNC (andc, orc) +FUSED_FUNC (eqv, orc) +FUSED_FUNC (nand, orc) +FUSED_FUNC (nor, orc) +FUSED_FUNC (or, orc) +FUSED_FUNC (orc, orc) +FUSED_FUNC (xor, orc) + +FUSED_FUNC (and, xor) +FUSED_FUNC (andc, xor) +FUSED_FUNC (eqv, xor) +FUSED_FUNC (nand, xor) +FUSED_FUNC (nor, xor) +FUSED_FUNC (or, xor) +FUSED_FUNC (orc, xor) +FUSED_FUNC (xor, xor) + + +/* List of functions to check. */ +typedef void func_t (vector_t, + vector_t, + vector_t); + +typedef func_t *ptr_func_t; + +static ptr_func_t functions[] = { + and_and, + andc_and, + eqv_and, + nand_and, + nor_and, + or_and, + orc_and, + xor_and, + + and_andc, + andc_andc, + eqv_andc, + nand_andc, + nor_andc, + or_andc, + orc_andc, + xor_andc, + + and_eqv, + andc_eqv, + eqv_eqv, + nand_eqv, + nor_eqv, + or_eqv, + orc_eqv, + xor_eqv, + + and_nand, + andc_nand, + eqv_nand, + nand_nand, + nor_nand, + or_nand, + orc_nand, + xor_nand, + + and_nor, + andc_nor, + eqv_nor, + nand_nor, + nor_nor, + or_nor, + orc_nor, + xor_nor, + + and_or, + andc_or, + eqv_or, + nand_or, + nor_or, + or_or, + orc_or, + xor_or, + + and_orc, + andc_orc, + eqv_orc, + nand_orc, + nor_orc, + or_orc, + orc_orc, + xor_orc, + + and_xor, + andc_xor, + eqv_xor, + nand_xor, + nor_xor, + or_xor, + orc_xor, + xor_xor, +}; + + +int +main (void) +{ + scalar_t s_a = 0x0fu; + scalar_t s_b = 0xaau; + scalar_t s_c = 0xccu; + + vector_t a = (vector_t) { s_a, s_a, ~s_a, ~s_a }; + vector_t b = (vector_t) { s_b, ~s_b, s_b, ~s_b }; + vector_t c = (vector_t) { s_c, ~s_c, ~s_c, s_c }; + + size_t i; + + for (i = 0; i < sizeof (functions) / sizeof (functions[0]); i++) + functions[i] (a, b, c); + +#ifdef DEBUG + printf ("Done, %d tests, %d failures\n", tests, errors); + return errors; + +#else + return 0; +#endif +} diff --git a/gcc/testsuite/gcc.target/powerpc/xxeval-2.c b/gcc/testsuite/gcc.target/powerpc/xxeval-2.c new file mode 100644 index 000000000000..f074622c9f67 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/xxeval-2.c @@ -0,0 +1,936 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Make sure all of the fusion cases that generate the xxeval instruction + actually generate it. */ +typedef vector unsigned int vector_t; + +static inline vector_t +vector_and (vector_t x, vector_t y) +{ + return x & y; +} + +static inline vector_t +vector_or (vector_t x, vector_t y) +{ + return x | y; +} + +static inline vector_t +vector_xor (vector_t x, vector_t y) +{ + return x ^ y; +} + +static inline vector_t +vector_andc (vector_t x, vector_t y) +{ + return x & ~y; +} + +static inline vector_t +vector_orc (vector_t x, vector_t y) +{ + return x | ~y; +} + +static inline vector_t +vector_nand (vector_t x, vector_t y) +{ + return ~(x & y); +} + +static inline vector_t +vector_nor (vector_t x, vector_t y) +{ + return ~(x | y); +} + +static inline vector_t +vector_eqv (vector_t x, vector_t y) +{ + return ~(x ^ y); +} + +void +and_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,1. */ + r = vector_and (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,14. */ + r = vector_andc (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,31. */ + r = vector_or (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,239. */ + r = vector_orc (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +and_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,30. */ + r = vector_xor (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,14. */ + r = vector_andc (a, vector_and (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,11. */ + r = vector_andc (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,210. */ + r = vector_eqv (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,239. */ + r = vector_nand (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,47. */ + r = vector_or (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,191. */ + r = vector_orc (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +andc_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,45. */ + r = vector_xor (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,9. */ + r = vector_and (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,210. */ + r = vector_eqv (a, vector_andc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,105. */ + r = vector_eqv (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,159. */ + r = vector_or (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +eqv_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,111. */ + r = vector_orc (a, vector_eqv (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,14. */ + r = vector_and (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,1. */ + r = vector_andc (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,30. */ + r = vector_eqv (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,2. */ + r = vector_nor (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nand_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,31. */ + r = vector_orc (a, vector_nand (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,8. */ + r = vector_and (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,7. */ + r = vector_andc (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,120. */ + r = vector_eqv (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,191. */ + r = vector_nand (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,143. */ + r = vector_or (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +nor_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,127. */ + r = vector_orc (a, vector_nor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,7. */ + r = vector_and (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,8. */ + r = vector_andc (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,127. */ + r = vector_or (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,143. */ + r = vector_orc (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +or_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,120. */ + r = vector_xor (a, vector_or (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,11. */ + r = vector_and (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,2. */ + r = vector_andc (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_eqv (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,75. */ + r = vector_eqv (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_nor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,8. */ + r = vector_nor (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,191. */ + r = vector_or (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,47. */ + r = vector_orc (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +orc_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,180. */ + r = vector_xor (a, vector_orc (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_and (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,6. */ + r = vector_and (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_andc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,9. */ + r = vector_andc (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_nand (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,249. */ + r = vector_nand (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_or (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,111. */ + r = vector_or (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_orc (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,159. */ + r = vector_orc (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +void +xor_xor (vector_t *p_a, vector_t *p_b, vector_t *p_c, vector_t *p_r) +{ + vector_t a = *p_a; + vector_t b = *p_b; + vector_t c = *p_c; + vector_t r; + + __asm__ (" # force fpr registers, %x0,%x1,%x2" + : "+d" (a), "+d" (b), "+d" (c)); + + /* xxeval r,a,b,c,105. */ + r = vector_xor (a, vector_xor (b, c)); + + __asm__ (" # force fpr result, %x0" : "+d" (r)); + *p_r = r; + return; +} + +/* Make sure none of traditional logical instructions are generated. Skip + checking for xxlor in case the register allocator decides to add some vector + moves. */ +/* { dg-final { scan-assembler-not {\mv(and|or|xor|andc|orc|nand|nor|eqv)\M} } } */ +/* { dg-final { scan-assembler-not {\mxxl(and|xor|andc|orc|nand|nor|eqv)\M} } } */ +/* { dg-final { scan-assembler-times {\mxxeval\M} 46 } } */