Hi, Please find attached the patch that implements absolute difference instructions for aarch64 target. The patch modifies the testcase vect.c and vect-fp.c to check the generated instructions and also their functionality.
Please review the patch and let me know if there should be any modifications? Regressed for aarch64-elf on aarch64-4.7-branch. Thanks, Naveen.H.S gcc/ 2013-01-30 Naveen H.S <naveen.hurugalaw...@caviumnetworks.com> * config/aarch64/aarch64-simd.md (simd_fabd): New Attribute. (abd<mode>_3): New pattern. (aba<mode>_3): New pattern. (fabd<mode>_3): New pattern. gcc/testsuite/ 2013-01-30 Naveen H.S <naveen.hurugalaw...@caviumnetworks.com> * gcc.target/aarch64/vect.c: Test and result vector added for sabd and saba instructions. * gcc.target/aarch64/vect-compile.c: Check for sabd and saba instructions in assembly. * gcc.target/aarch64/vect.x: Add sabd and saba test functions. * gcc.target/aarch64/vect-fp.c: Test and result vector added for fabd instruction. * gcc.target/aarch64/vect-fp-compile.c: Check for fabd instruction in assembly. * gcc.target/aarch64/vect-fp.x: Add fabd test function.
--- gcc/config/aarch64/aarch64-simd.md 2013-01-29 11:37:04.705429514 +0530 +++ gcc/config/aarch64/aarch64-simd.md 2013-01-29 16:58:07.401718855 +0530 @@ -44,6 +44,7 @@ ; simd_dup duplicate element. ; simd_dupgp duplicate general purpose register. ; simd_ext bitwise extract from pair. +; simd_fabd floating absolute difference and accumulate. ; simd_fadd floating point add/sub. ; simd_fcmp floating point compare. ; simd_fcvti floating point convert to integer. @@ -148,6 +149,7 @@ simd_dupgp,\ simd_ext,\ simd_fadd,\ + simd_fabd,\ simd_fcmp,\ simd_fcvti,\ simd_fcvtl,\ @@ -520,6 +522,40 @@ (set_attr "simd_mode" "<MODE>")] ) +(define_insn "abd<mode>_3" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (abs:VDQ_BHSI (minus:VDQ_BHSI + (match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w"))))] + "TARGET_SIMD" + "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "simd_type" "simd_abd") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "aba<mode>_3" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI + (match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w"))) + (match_operand:VDQ_BHSI 3 "register_operand" "0")))] + "TARGET_SIMD" + "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "simd_type" "simd_abd") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "fabd<mode>_3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (abs:VDQF (minus:VDQF + (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w"))))] + "TARGET_SIMD" + "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "simd_type" "simd_fabd") + (set_attr "simd_mode" "<MODE>")] +) + (define_insn "and<mode>3" [(set (match_operand:VDQ 0 "register_operand" "=w") (and:VDQ (match_operand:VDQ 1 "register_operand" "w") --- gcc/testsuite/gcc.target/aarch64/vect.c 2013-01-24 20:10:09.703833384 +0530 +++ gcc/testsuite/gcc.target/aarch64/vect.c 2013-01-30 10:30:05.089505837 +0530 @@ -55,6 +55,8 @@ int main (void) int smin_vector[] = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15}; unsigned int umax_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; unsigned int umin_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + int sabd_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int saba_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; int reduce_smax_value = 0; int reduce_smin_value = -15; unsigned int reduce_umax_value = 15; @@ -81,6 +83,8 @@ int main (void) TEST (smin, s); TEST (umax, u); TEST (umin, u); + TEST (sabd, s); + TEST (saba, s); TESTV (reduce_smax, s); TESTV (reduce_smin, s); TESTV (reduce_umax, u); --- gcc/testsuite/gcc.target/aarch64/vect-compile.c 2013-01-24 20:10:09.703833384 +0530 +++ gcc/testsuite/gcc.target/aarch64/vect-compile.c 2013-01-29 14:11:16.909568490 +0530 @@ -16,5 +16,7 @@ /* { dg-final { scan-assembler "uminv" } } */ /* { dg-final { scan-assembler "smaxv" } } */ /* { dg-final { scan-assembler "sminv" } } */ +/* { dg-final { scan-assembler "sabd" } } */ +/* { dg-final { scan-assembler "saba" } } */ /* { dg-final { scan-assembler-times "addv" 2} } */ /* { dg-final { scan-assembler-times "addp" 2} } */ --- gcc/testsuite/gcc.target/aarch64/vect-fp.c 2013-01-24 20:10:09.703833384 +0530 +++ gcc/testsuite/gcc.target/aarch64/vect-fp.c 2013-01-30 10:40:23.877491750 +0530 @@ -117,6 +117,16 @@ int main (void) 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 }; + F32 fabd_F32_vector[] = { 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f }; + + F64 fabd_F64_vector[] = { 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0 }; + /* Setup input vectors. */ for (i=1; i<=16; i++) { @@ -132,6 +142,7 @@ int main (void) TEST (div, 3); TEST (neg, 2); TEST (abs, 2); + TEST (fabd, 3); return 0; } --- gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c 2013-01-24 20:10:09.703833384 +0530 +++ gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c 2013-01-29 14:11:16.909568490 +0530 @@ -11,3 +11,4 @@ /* { dg-final { scan-assembler "fdiv\\tv" } } */ /* { dg-final { scan-assembler "fneg\\tv" } } */ /* { dg-final { scan-assembler "fabs\\tv" } } */ +/* { dg-final { scan-assembler "fabd\\tv" } } */ --- gcc/testsuite/gcc.target/aarch64/vect-fp.x 2013-01-24 20:10:09.703833384 +0530 +++ gcc/testsuite/gcc.target/aarch64/vect-fp.x 2013-01-29 14:11:16.909568490 +0530 @@ -7,6 +7,16 @@ typedef double *__restrict__ pRF64; extern float fabsf (float); extern double fabs (double); +#define DEF3a(fname, type, op) \ + void fname##_##type (pR##type a, \ + pR##type b, \ + pR##type c) \ + { \ + int i; \ + for (i=0; i<16; i++) \ + a[i] = op (b[i] - c[i]); \ + } + #define DEF3(fname, type, op) \ void fname##_##type (pR##type a, \ pR##type b, \ @@ -27,6 +37,10 @@ extern double fabs (double); } +#define DEFN3a(fname, op) \ + DEF3a (fname, F32, op) \ + DEF3a (fname, F64, op) + #define DEFN3(fname, op) \ DEF3 (fname, F32, op) \ DEF3 (fname, F64, op) @@ -42,3 +56,5 @@ DEFN3 (div, /) DEFN2 (neg, -) DEF2 (abs, F32, fabsf) DEF2 (abs, F64, fabs) +DEF3a (fabd, F32, fabsf) +DEF3a (fabd, F64, fabs) --- gcc/testsuite/gcc.target/aarch64/vect.x 2013-01-24 20:10:09.703833384 +0530 +++ gcc/testsuite/gcc.target/aarch64/vect.x 2013-01-29 18:37:32.321808454 +0530 @@ -138,3 +138,18 @@ long long reduce_add_s64 (pRINT64 a) return s; } + +void sabd (pRINT a, pRINT b, pRINT c) +{ + int i; + for (i=0;i<16;i++) + c[i] = abs (a[i] - b[i]); +} + +void saba (pRINT a, pRINT b, pRINT c) +{ + int i; + for (i=0;i<16;i++) + c[i] += abs (a[i] - b[i]); +} +