Hello, This patch adds support to AdvSIMD CLZ instruction and adds tests for the same. Regression test done for aarch64-none-elf with no issues.
OK? Regards VP --- gcc/ChangeLog 2013-05-22 Vidya Praveen <vidyaprav...@arm.com> * config/aarch64/aarch64-simd.md (clzv4si2): Support for CLZ instruction (AdvSIMD). * config/aarch64/aarch64-builtins.c (aarch64_builtin_vectorized_function): Handler for BUILT_IN_CLZ. * config/aarch64/aarch-simd-builtins.def: Entry for CLZ. * testsuite/gcc.target/aarch64/vect-clz.c: New file.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 4fdfe24..2a0e5fd 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1245,6 +1245,16 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) return AARCH64_FIND_FRINT_VARIANT (sqrt); #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == SImode && out_n == C \ + && in_mode == N##Imode && in_n == C) + case BUILT_IN_CLZ: + { + if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_clzv4si]; + return NULL_TREE; + } +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ (out_mode == N##Imode && out_n == C \ && in_mode == N##Fmode && in_n == C) case BUILT_IN_LFLOOR: diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index e420173..5134f96 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -49,6 +49,7 @@ BUILTIN_VDQF (UNOP, sqrt, 2) BUILTIN_VD_BHSI (BINOP, addp, 0) VAR1 (UNOP, addp, 0, di) + VAR1 (UNOP, clz, 2, v4si) BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 9069a73..82fe1ad 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1611,6 +1611,15 @@ DONE; }) +(define_insn "clz<mode>2" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] + "TARGET_SIMD" + "clz\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "simd_type" "simd_cls") + (set_attr "simd_mode" "<MODE>")] +) + ;; 'across lanes' max and min ops. (define_insn "reduc_<maxmin_uns>_<mode>" diff --git a/gcc/testsuite/gcc.target/aarch64/vect-clz.c b/gcc/testsuite/gcc.target/aarch64/vect-clz.c new file mode 100644 index 0000000..8f1fe70 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-clz.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fno-inline" } */ + +extern void abort (); + +void +count_lz_v4si (unsigned *__restrict a, int *__restrict b) +{ + int i; + + for (i = 0; i < 4; i++) + b[i] = __builtin_clz (a[i]); +} + +/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.4s" } } */ + +int +main () +{ + unsigned int x[4] = { 0x0, 0xFFFF, 0x1FFFF, 0xFFFFFFFF }; + int r[4] = { 32, 16, 15, 0 }; + int d[4], i; + + count_lz_v4si (x, d); + + for (i = 0; i < 4; i++) + { + if (d[i] != r[i]) + abort (); + } + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */