This patch adds AArch64 patterns for converting between 64-bit and 128-bit integer vectors, and makes the vectoriser and expand pass use them.
2019-10-24 Richard Sandiford <richard.sandif...@arm.com> gcc/ * tree-vect-stmts.c (vectorizable_conversion): Extend the non-widening and non-narrowing path to handle standard conversion codes, if the target supports them. * expr.c (convert_move): Try using the extend and truncate optabs for vectors. * optabs-tree.c (supportable_convert_operation): Likewise. * config/aarch64/iterators.md (Vnarroqw): New iterator. * config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2) (trunc<mode><Vnarrowq>2): New patterns. gcc/testsuite/ * gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass on aarch64 targets. * gcc.dg/vect/vect-double-reduc-5.c: Likewise. * gcc.dg/vect/vect-outer-4e.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_5.c: New test. * gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise. Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c 2019-10-25 13:27:32.877640367 +0100 +++ gcc/tree-vect-stmts.c 2019-10-25 13:27:36.197616908 +0100 @@ -4861,7 +4861,9 @@ vectorizable_conversion (stmt_vec_info s switch (modifier) { case NONE: - if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) + if (code != FIX_TRUNC_EXPR + && code != FLOAT_EXPR + && !CONVERT_EXPR_CODE_P (code)) return false; if (supportable_convert_operation (code, vectype_out, vectype_in, &decl1, &code1)) Index: gcc/expr.c =================================================================== --- gcc/expr.c 2019-10-22 08:46:57.359355939 +0100 +++ gcc/expr.c 2019-10-25 13:27:36.193616936 +0100 @@ -250,6 +250,31 @@ convert_move (rtx to, rtx from, int unsi if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode)) { + if (GET_MODE_UNIT_PRECISION (to_mode) + > GET_MODE_UNIT_PRECISION (from_mode)) + { + optab op = unsignedp ? zext_optab : sext_optab; + insn_code icode = convert_optab_handler (op, to_mode, from_mode); + if (icode != CODE_FOR_nothing) + { + emit_unop_insn (icode, to, from, + unsignedp ? ZERO_EXTEND : SIGN_EXTEND); + return; + } + } + + if (GET_MODE_UNIT_PRECISION (to_mode) + < GET_MODE_UNIT_PRECISION (from_mode)) + { + insn_code icode = convert_optab_handler (trunc_optab, + to_mode, from_mode); + if (icode != CODE_FOR_nothing) + { + emit_unop_insn (icode, to, from, TRUNCATE); + return; + } + } + gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode), GET_MODE_BITSIZE (to_mode))); Index: gcc/optabs-tree.c =================================================================== --- gcc/optabs-tree.c 2019-10-08 09:23:31.894529571 +0100 +++ gcc/optabs-tree.c 2019-10-25 13:27:36.193616936 +0100 @@ -303,6 +303,20 @@ supportable_convert_operation (enum tree return true; } + if (GET_MODE_UNIT_PRECISION (m1) > GET_MODE_UNIT_PRECISION (m2) + && can_extend_p (m1, m2, TYPE_UNSIGNED (vectype_in))) + { + *code1 = code; + return true; + } + + if (GET_MODE_UNIT_PRECISION (m1) < GET_MODE_UNIT_PRECISION (m2) + && convert_optab_handler (trunc_optab, m1, m2) != CODE_FOR_nothing) + { + *code1 = code; + return true; + } + /* Now check for builtin. */ if (targetm.vectorize.builtin_conversion && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in)) Index: gcc/config/aarch64/iterators.md =================================================================== --- gcc/config/aarch64/iterators.md 2019-10-17 14:23:07.711222242 +0100 +++ gcc/config/aarch64/iterators.md 2019-10-25 13:27:36.189616964 +0100 @@ -860,6 +860,8 @@ (define_mode_attr VNARROWQ [(V8HI "V8QI" (V2DI "V2SI") (DI "SI") (SI "HI") (HI "QI")]) +(define_mode_attr Vnarrowq [(V8HI "v8qi") (V4SI "v4hi") + (V2DI "v2si")]) ;; Narrowed quad-modes for VQN (Used for XTN2). (define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI") Index: gcc/config/aarch64/aarch64-simd.md =================================================================== --- gcc/config/aarch64/aarch64-simd.md 2019-08-25 19:10:35.550157075 +0100 +++ gcc/config/aarch64/aarch64-simd.md 2019-10-25 13:27:36.189616964 +0100 @@ -7007,3 +7007,21 @@ (define_insn "aarch64_crypto_pmullv2di" "pmull2\\t%0.1q, %1.2d, %2.2d" [(set_attr "type" "crypto_pmull")] ) + +;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector. +(define_insn "<optab><Vnarrowq><mode>2" + [(set (match_operand:VQN 0 "register_operand" "=w") + (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))] + "TARGET_SIMD" + "<su>xtl\t%0.<Vtype>, %1.<Vntype>" + [(set_attr "type" "neon_shift_imm_long")] +) + +;; Truncate a 128-bit integer vector to a 64-bit vector. +(define_insn "trunc<mode><Vnarrowq>2" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] + "TARGET_SIMD" + "xtn\t%0.<Vntype>, %1.<Vtype>" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) Index: gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c =================================================================== --- gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c 2019-03-08 18:15:02.252871290 +0000 +++ gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c 2019-10-25 13:27:36.193616936 +0100 @@ -46,4 +46,4 @@ int main (void) } /* Until we support multiple types in the inner loop */ -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail { ! aarch64*-*-* } } } } */ Index: gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c =================================================================== --- gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c 2019-03-08 18:15:02.244871320 +0000 +++ gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c 2019-10-25 13:27:36.193616936 +0100 @@ -52,5 +52,5 @@ int main () /* Vectorization of loops with multiple types and double reduction is not supported yet. */ -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */ Index: gcc/testsuite/gcc.dg/vect/vect-outer-4e.c =================================================================== --- gcc/testsuite/gcc.dg/vect/vect-outer-4e.c 2019-03-08 18:15:02.264871246 +0000 +++ gcc/testsuite/gcc.dg/vect/vect-outer-4e.c 2019-10-25 13:27:36.193616936 +0100 @@ -23,4 +23,4 @@ foo (){ return; } -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */ Index: gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c 2019-10-25 13:27:36.193616936 +0100 @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int64_t *x, int64_t *y, int32_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 2]; + } +} + +/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c 2019-10-25 13:27:36.193616936 +0100 @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int32_t *x, int32_t *y, int16_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 4]; + } +} + +/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c 2019-10-25 13:27:36.193616936 +0100 @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int16_t *x, int16_t *y, int8_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 8]; + } +} + +/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c 2019-10-25 13:27:36.193616936 +0100 @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int64_t *x, int64_t *y, uint32_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 2]; + } +} + +/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c 2019-10-25 13:27:36.193616936 +0100 @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int32_t *x, int32_t *y, uint16_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 4]; + } +} + +/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c 2019-10-25 13:27:36.193616936 +0100 @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int16_t *x, int16_t *y, uint8_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 8]; + } +} + +/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c 2019-10-25 13:27:36.193616936 +0100 @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int32_t *x, int64_t *y, int64_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 2]; + } +} + +/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.2s, v[0-9]+\.2d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c 2019-10-25 13:27:36.193616936 +0100 @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int16_t *x, int32_t *y, int32_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 4]; + } +} + +/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.4h, v[0-9]+\.4s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c 2019-10-25 13:27:36.193616936 +0100 @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int8_t *x, int16_t *y, int16_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 8]; + } +} + +/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.8b, v[0-9]+\.8h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */