RE: [PATCH] aarch64: use ZIP1 instead of UZP1 for concatenation [PR125550]

Tamar Christina Tue, 09 Jun 2026 05:19:34 -0700

> -----Original Message-----
> From: Artemiy Volkov <[email protected]>
> Sent: 08 June 2026 13:44
> To: [email protected]
> Cc: Tamar Christina <[email protected]>; Wilco Dijkstra
> <[email protected]>; [email protected]; Richard
> Earnshaw <[email protected]>; [email protected]; Alice
> Carlotti <[email protected]>; Alex Coplan <[email protected]>;
> Artemiy Volkov <[email protected]>
> Subject: [PATCH] aarch64: use ZIP1 instead of UZP1 for concatenation
> [PR125550]
> 
> This patch addresses the issue in PR125550, where two float16 values are
> being concatenated using uzp1, i.e., this code:
> 
> svfloat16_t foo (float x0, float x1)
> {
>   return svdupq_n_f16 (x0, x1, x0, x1, x0, x1, x0, x1);
> }
> 
> is being compiled into:
> 
>       fcvt    h0, s0
>       fcvt    h1, s1
>       uzp1    v0.4h, v0.4h, v1.4h
>       mov     z0.s, s0
>       ret
> 
> causing the duplication of a 2-element vector (0, (float16) x0) into z0.
> 
> This is a copy-paste error from the original combine_internal patterns,
> where UZP1 always operates on vectors of 2 elements, in which circumstance
> it is equivalent to ZIP1.  For smaller element sizes (and thus higher
> element counts) only ZIP1 is correct.
> 
> The fix is to emit ZIP1 when concatenating values on vector registers.
> For consistency, I've changed the original combine_internal patterns as
> well as the ones added in r17-898-g920eeb67a3537b.  Since this latter
> change has nothing to do with the PR, it could have been better to split
> the patch in two; I'd be happy to do that if necessary.
> 
> Both aforementioned changes required adjusting existing AdvSIMD/SVE
> vec_init-related testcases; I've added pr125550.c from the PR on top of
> that as well.
> 
> Bootstrapped and regtested on aarch64-linux-gnu.
> 
>       PR target/125550
> 
> gcc/ChangeLog:
> 
>         * config/aarch64/aarch64-simd.md
>         (*aarch64_combine_internal<mode>): Use zip1 instead of uzp1
>       to concatenate values residing in SIMD registers.
>       (*aarch64_combine_internal_be<mode>: Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/aarch64/ldp_stp_16.c: Adjust testcases.
>       * gcc.target/aarch64/pr109072_1.c: Likewise.
>       * gcc.target/aarch64/simd/mf8_data_1.c: Likewise.
>       * gcc.target/aarch64/sve/vec_init_5.c: Likewise.
>       * gcc.target/aarch64/vec-init-14.c: Likewise.
>       * gcc.target/aarch64/vec-init-23.c: Likewise.
>       * gcc.target/aarch64/vec-init-9.c: Likewise.
>       * gcc.target/aarch64/sve/pr125550.c: New test.


OK.

Thanks,
Tamar
> ---
>  gcc/config/aarch64/aarch64-simd.md            |  8 +++---
>  gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c | 10 +++----
>  gcc/testsuite/gcc.target/aarch64/pr109072_1.c |  4 +--
>  .../gcc.target/aarch64/simd/mf8_data_1.c      | 18 ++++++-------
>  .../gcc.target/aarch64/sve/pr125550.c         | 19 ++++++++++++++
>  .../gcc.target/aarch64/sve/vec_init_5.c       | 26 +++++++++----------
>  .../gcc.target/aarch64/vec-init-14.c          |  4 +--
>  .../gcc.target/aarch64/vec-init-23.c          | 26 +++++++++----------
>  gcc/testsuite/gcc.target/aarch64/vec-init-9.c | 12 ++++-----
>  9 files changed, 73 insertions(+), 54 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr125550.c
> 
> diff --git a/gcc/config/aarch64/aarch64-simd.md
> b/gcc/config/aarch64/aarch64-simd.md
> index 843ad6cb076..b2e8fe3f6a9 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -4869,7 +4869,7 @@
>     && (register_operand (operands[0], <VDBL>mode)
>         || register_operand (operands[2], <MODE>mode))"
>    {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
> -     [ w        , w  , w   ; neon_permute<dblq>        , simd  ]
> uzp1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
> +     [ w        , w  , w   ; neon_permute<dblq>        , simd  ]
> zip1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
>       [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ]
> ins\t%0.<single_type>[1], %<single_wx>2
>       [ w        , 0  , ?r  ; f_mcr                     , *     ] 
> fmov\t%0.d[1], %2
>       [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ]
> ld1\t{%0.<single_type>}[1], %2
> @@ -4886,7 +4886,7 @@
>    "TARGET_FLOAT
>     && !BYTES_BIG_ENDIAN"
>    {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
> -     [ w        , w  , w   ; neon_permute              , simd  ] 
> uzp1\t%0.<Vdduptype>,
> %1.<Vdduptype>, %2.<Vdduptype>
> +     [ w        , w  , w   ; neon_permute              , simd  ] 
> zip1\t%0.<Vdduptype>,
> %1.<Vdduptype>, %2.<Vdduptype>
>       [ w        , 0  , w   ; neon_move                 , simd  ] 
> mov\t%0.<single_type>[1],
> %2.<single_type>[0]
>       [ w        , 0  , Utv ; neon_load1_one_lane       , simd  ]
> ld1\t{%0.<single_type>}[1], %2
>       [ w        , 0  , r   ; neon_from_gp              , simd  ] 
> ins\t%0.<single_type>[1],
> %<single_wx>2
> @@ -4916,7 +4916,7 @@
>     && (register_operand (operands[0], <VDBL>mode)
>         || register_operand (operands[2], <MODE>mode))"
>    {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
> -     [ w        , w  , w   ; neon_permute<dblq>        , simd  ]
> uzp1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
> +     [ w        , w  , w   ; neon_permute<dblq>        , simd  ]
> zip1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
>       [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ]
> ins\t%0.<single_type>[1], %<single_wx>2
>       [ w        , 0  , ?r  ; f_mcr                     , *     ] 
> fmov\t%0.d[1], %2
>       [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ]
> ld1\t{%0.<single_type>}[1], %2
> @@ -4933,7 +4933,7 @@
>    "TARGET_FLOAT
>     && BYTES_BIG_ENDIAN"
>    {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
> -     [ w        , w  , w   ; neon_permute              , simd  ] 
> uzp1\t%0.<Vdduptype>,
> %1.<Vdduptype>, %2.<Vdduptype>
> +     [ w        , w  , w   ; neon_permute              , simd  ] 
> zip1\t%0.<Vdduptype>,
> %1.<Vdduptype>, %2.<Vdduptype>
>       [ w        , 0  , w   ; neon_move                 , simd  ] 
> mov\t%0.<single_type>[1],
> %2.<single_type>[0]
>       [ w        , 0  , Utv ; neon_load1_one_lane       , simd  ]
> ld1\t{%0.<single_type>}[1], %2
>       [ w        , 0  , r   ; neon_from_gp              , simd  ] 
> ins\t%0.<single_type>[1],
> %<single_wx>2
> diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c
> b/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c
> index a6b4d50f34f..e8c975e900f 100644
> --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c
> @@ -80,14 +80,14 @@ CONS2_FN (2, float);
> 
>  /*
>  ** cons2_4_float:    { target aarch64_little_endian }
> -**   uzp1    v([0-9])\.2s, v0\.2s, v1\.2s
> +**   zip1    v([0-9])\.2s, v0\.2s, v1\.2s
>  **   stp     d\1, d\1, \[x0\]
>  **   stp     d\1, d\1, \[x0, #?16\]
>  **   ret
>  */
>  /*
>  ** cons2_4_float:    { target aarch64_big_endian }
> -**   uzp1    v([0-9])\.2s, v1\.2s, v0\.2s
> +**   zip1    v([0-9])\.2s, v1\.2s, v0\.2s
>  **   stp     d\1, d\1, \[x0\]
>  **   stp     d\1, d\1, \[x0, #?16\]
>  **   ret
> @@ -96,7 +96,7 @@ CONS2_FN (4, float);
> 
>  /*
>  ** cons2_8_float:
> -**   uzp1    v1\.2s, v0\.2s, v1\.2s
> +**   zip1    v1\.2s, v0\.2s, v1\.2s
>  **   dup     v([0-9]+)\.2d, v1\.d\[0\]
>  **   stp     q\1, q\1, \[x0\]
>  **   stp     q\1, q\1, \[x0, #?32\]
> @@ -124,8 +124,8 @@ CONS4_FN (2, float);
> 
>  /*
>  ** cons4_4_float:
> -**   uzp1    v[0-9]+\.2s[^\n]+
> -**   uzp1    v[0-9]+\.2s[^\n]+
> +**   zip1    v[0-9]+\.2s[^\n]+
> +**   zip1    v[0-9]+\.2s[^\n]+
>  **   zip1    v([0-9]+).4s, [^\n]+
>  **   stp     q\1, q\1, \[x0\]
>  **   stp     q\1, q\1, \[x0, #?32\]
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr109072_1.c
> b/gcc/testsuite/gcc.target/aarch64/pr109072_1.c
> index 39d80222142..daaccf0b881 100644
> --- a/gcc/testsuite/gcc.target/aarch64/pr109072_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/pr109072_1.c
> @@ -54,7 +54,7 @@ f32x2_1 (float32_t x)
> 
>  /*
>  ** f32x2_2:
> -**   uzp1    v0\.2s, v0\.2s, v1\.2s
> +**   zip1    v0\.2s, v0\.2s, v1\.2s
>  **   ret
>  */
>  float32x2_t
> @@ -166,7 +166,7 @@ f64x2_1 (float64_t x)
> 
>  /*
>  ** f64x2_2:
> -**   uzp1    v0\.2d, v0\.2d, v1\.2d
> +**   zip1    v0\.2d, v0\.2d, v1\.2d
>  **   ret
>  */
>  float64x2_t
> diff --git a/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c
> b/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c
> index 79d1ccf6f7d..e440b899f59 100644
> --- a/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c
> @@ -66,7 +66,7 @@ mfloat8x16_t test_bslq3(mfloat8x16_t a, uint8x16_t b,
> mfloat8x16_t c)
> 
>  /*
>  ** test_combine1:
> -**   uzp1    v0.2d, v1.2d, v2.2d
> +**   zip1    v0.2d, v1.2d, v2.2d
>  **   ret
>  */
>  mfloat8x16_t test_combine1(mfloat8_t a, mfloat8x8_t b, mfloat8x8_t c)
> @@ -1397,7 +1397,7 @@ mfloat8x8_t test_tbl1(mfloat8x8_t a, uint8x8_t b)
> 
>  /*
>  ** test_tbl2:
> -**   uzp1    v([0-9]+).2d, v0.2d, v1.2d
> +**   zip1    v([0-9]+).2d, v0.2d, v1.2d
>  **   tbl     v0.8b, {v\1.16b}, v2.8b
>  **   ret
>  */
> @@ -1408,7 +1408,7 @@ mfloat8x8_t test_tbl2(mfloat8x8x2_t a, uint8x8_t
> b)
> 
>  /*
>  ** test_tbl3:
> -**   uzp1    v([0-9]+).2d, v0.2d, v1.2d
> +**   zip1    v([0-9]+).2d, v0.2d, v1.2d
>  **   fmov    d([0-9]+), d2
>  **   tbl     v0.8b, {v\1.16b( - |, )v\2.16b}, v3.8b
>  **   ret
> @@ -1420,8 +1420,8 @@ mfloat8x8_t test_tbl3(mfloat8x8x3_t a, uint8x8_t
> b)
> 
>  /*
>  ** test_tbl4:
> -**   uzp1    v([0-9]+).2d, v0.2d, v1.2d
> -**   uzp1    v([0-9]+).2d, v2.2d, v3.2d
> +**   zip1    v([0-9]+).2d, v0.2d, v1.2d
> +**   zip1    v([0-9]+).2d, v2.2d, v3.2d
>  **   tbl     v0.8b, {v\1.16b( - |, )v\2.16b}, v4.8b
>  **   ret
>  */
> @@ -1526,7 +1526,7 @@ mfloat8x8_t test_tbx1(mfloat8x8_t a, mfloat8x8_t
> b, uint8x8_t c)
> 
>  /*
>  ** test_tbx2:
> -**   uzp1    v([0-9]+).2d, v1.2d, v2.2d
> +**   zip1    v([0-9]+).2d, v1.2d, v2.2d
>  **   tbx     v[0-9]+.8b, {v\1.16b}, v3.8b
>  **   ret
>  */
> @@ -1537,7 +1537,7 @@ mfloat8x8_t test_tbx2(mfloat8x8_t a,
> mfloat8x8x2_t b, uint8x8_t c)
> 
>  /*
>  ** test_tbx3:
> -**   uzp1    v([0-9]+).2d, v1.2d, v2.2d
> +**   zip1    v([0-9]+).2d, v1.2d, v2.2d
>  **   fmov    d([0-9]+), d3
>  **   tbl     v[0-9]+.8b, {v\1.16b( - |, )v\2.16b}, v4.8b
>  **   ...
> @@ -1552,8 +1552,8 @@ mfloat8x8_t test_tbx3(mfloat8x8_t a,
> mfloat8x8x3_t b, uint8x8_t c)
> 
>  /*
>  ** test_tbx4:
> -**   uzp1    v([0-9]+).2d, v1.2d, v2.2d
> -**   uzp1    v([0-9]+).2d, v3.2d, v4.2d
> +**   zip1    v([0-9]+).2d, v1.2d, v2.2d
> +**   zip1    v([0-9]+).2d, v3.2d, v4.2d
>  **   tbx     v0.8b, {v\1.16b( - |, )v\2.16b}, v5.8b
>  **   ret
>  */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr125550.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr125550.c
> new file mode 100644
> index 00000000000..89186dc07c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr125550.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=armv9.5-a" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_sve.h>
> +
> +svfloat16_t foo (float x0, float x1)
> +{
> +  return svdupq_n_f16 (x0, x1, x0, x1, x0, x1, x0, x1);
> +}
> +
> +/*
> +** foo:
> +**   fcvt    h([01]), s\1
> +**   fcvt    h([01]), s\2
> +**   zip1    v0\.4h, v0\.4h, v1\.4h
> +**   mov     z0\.s, s0
> +**   ret
> +*/
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
> b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
> index 2bc9a3aeba5..0dd085a9423 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
> @@ -183,15 +183,15 @@
>  ** test_float16_2:
>  **   fcvt    h1, s1
>  **   fcvt    h0, s0
> -**   uzp1    v0\.4h, v0\.4h, v1\.4h
> +**   zip1    v0\.4h, v0\.4h, v1\.4h
>  **   mov     z0\.s, s0
>  **   ret
>  */
> 
>  /*
>  ** test_float16_3:
> -**   uzp1    v2\.2s, v0\.2s, v2\.2s
> -**   uzp1    v3\.2s, v1\.2s, v3\.2s
> +**   zip1    v2\.2s, v0\.2s, v2\.2s
> +**   zip1    v3\.2s, v1\.2s, v3\.2s
>  **   zip1    v3\.4s, v2\.4s, v3\.4s
>  **   fcvtn   v3\.4h, v3\.4s
>  **   mov     z0\.d, d3
> @@ -210,7 +210,7 @@
>  ** test_float16_5:
>  **   movi    v31\.4h, #0
>  **   fcvt    h0, s0
> -**   uzp1    v0\.4h, v31\.4h, v0\.4h
> +**   zip1    v0\.4h, v31\.4h, v0\.4h
>  **   mov     z0\.s, s0
>  **   ret
>  */
> @@ -221,7 +221,7 @@
>  **   fcvt    h1, s1
>  **   fmov    h31, 1.0e\+0
>  **   fmov    h2, h2
> -**   uzp1    v1\.4h, v1\.4h, v31\.4h
> +**   zip1    v1\.4h, v1\.4h, v31\.4h
>  **   dup     v0\.2s, v2\.s\[0\]
>  **   dup     v1\.2s, v1\.s\[0\]
>  **   zip1    v0\.8h, v0\.8h, v1\.8h
> @@ -235,8 +235,8 @@
>  **   fcvt    h2, s1
>  **   movi    v0\.4h, #0
>  **   fmov    h1, 1.0e\+0
> -**   uzp1    v1\.4h, v1\.4h, v2\.4h
> -**   uzp1    v0\.4h, v0\.4h, v3\.4h
> +**   zip1    v1\.4h, v1\.4h, v2\.4h
> +**   zip1    v0\.4h, v0\.4h, v3\.4h
>  **   dup     v1\.2s, v1\.s\[0\]
>  **   dup     v0\.2s, v0\.s\[0\]
>  **   zip1    v0\.8h, v0\.8h, v1\.8h
> @@ -249,7 +249,7 @@
>  **   fcvt    h1, s1
>  **   fcvt    h0, s0
>  **   movi    v31\.2s, 0x3c, lsl 24
> -**   uzp1    v0\.4h, v0\.4h, v1.4h
> +**   zip1    v0\.4h, v0\.4h, v1.4h
>  **   dup     v0\.2s, v0\.s\[0\]
>  **   zip1    v0\.8h, v31\.8h, v0\.8h
>  **   dup     z0\.q, z0\.q\[0\]
> @@ -261,8 +261,8 @@
>  **   fcvt    h1, s1
>  **   fcvt    h2, s2
>  **   fcvt    h0, s0
> -**   uzp1    v0\.4h, v0\.4h, v1\.4h
> -**   uzp1    v1\.4h, v1\.4h, v2\.4h
> +**   zip1    v0\.4h, v0\.4h, v1\.4h
> +**   zip1    v1\.4h, v1\.4h, v2\.4h
>  **   dup     v0\.2s, v0\.s\[0\]
>  **   dup     v1\.2s, v1\.s\[0\]
>  **   zip1    v0\.8h, v0\.8h, v1\.8h
> @@ -275,7 +275,7 @@
>  **   fcvt    h2, s2
>  **   fcvt    h0, s0
>  **   fcvt    h1, s1
> -**   uzp1    v0\.4h, v0\.4h, v2\.4h
> +**   zip1    v0\.4h, v0\.4h, v2\.4h
>  **   dup     v1\.4h, v1\.h\[0\]
>  **   dup     v0\.2s, v0\.s\[0\]
>  **   zip1    v0\.8h, v0\.8h, v1\.8h
> @@ -386,7 +386,7 @@
> 
>  /*
>  ** test_float32_2:
> -**   uzp1    v0\.2s, v0\.2s, v1\.2s
> +**   zip1    v0\.2s, v0\.2s, v1\.2s
>  **   mov     z0\.d, d0
>  **   ret
>  */
> @@ -401,7 +401,7 @@
>  /*
>  ** test_float32_4:
>  **   movi    v31\.2s, #0
> -**   uzp1    v0\.2s, v31\.2s, v0\.2s
> +**   zip1    v0\.2s, v31\.2s, v0\.2s
>  **   mov     z0\.d, d0
>  **   ret
>  */
> diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-14.c
> b/gcc/testsuite/gcc.target/aarch64/vec-init-14.c
> index 1a2cc9fbf47..ea719f32e4f 100644
> --- a/gcc/testsuite/gcc.target/aarch64/vec-init-14.c
> +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-14.c
> @@ -67,7 +67,7 @@ int32x2_t s32_6(int32_t a0, int32_t a1) {
> 
>  /*
>  ** f32_1:
> -**   uzp1    v0\.2s, v0\.2s, v1\.2s
> +**   zip1    v0\.2s, v0\.2s, v1\.2s
>  **   ret
>  */
>  float32x2_t f32_1(float32_t a0, float32_t a1) {
> @@ -90,7 +90,7 @@ float32x2_t f32_2(float32_t a0, float32_t *ptr) {
>  /*
>  ** f32_3:
>  **   ldr     s0, \[x0\]
> -**   uzp1    v0\.2s, v0\.2s, v1\.2s
> +**   zip1    v0\.2s, v0\.2s, v1\.2s
>  **   ret
>  */
>  float32x2_t f32_3(float32_t a0, float32_t a1, float32_t *ptr) {
> diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-23.c
> b/gcc/testsuite/gcc.target/aarch64/vec-init-23.c
> index 2a209509d1b..9374da8a84d 100644
> --- a/gcc/testsuite/gcc.target/aarch64/vec-init-23.c
> +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-23.c
> @@ -242,15 +242,15 @@ TEST_64(int, int64_t, s)
>  ** test_float16_2:
>  **   fcvt    h1, s1
>  **   fcvt    h0, s0
> -**   uzp1    v0\.4h, v0\.4h, v1\.4h
> +**   zip1    v0\.4h, v0\.4h, v1\.4h
>  **   dup     v0\.4s, v0\.s\[0\]
>  **   ret
>  */
> 
>  /*
>  ** test_float16_3:
> -**   uzp1    v2\.2s, v0\.2s, v2\.2s
> -**   uzp1    v3\.2s, v1\.2s, v3\.2s
> +**   zip1    v2\.2s, v0\.2s, v2\.2s
> +**   zip1    v3\.2s, v1\.2s, v3\.2s
>  **   zip1    v3\.4s, v2\.4s, v3\.4s
>  **   fcvtn   v3\.4h, v3\.4s
>  **   dup     v0\.2d, v3\.d\[0\]
> @@ -269,7 +269,7 @@ TEST_64(int, int64_t, s)
>  ** test_float16_5:
>  **   movi    v31\.4h, #0
>  **   fcvt    h0, s0
> -**   uzp1    v0\.4h, v31\.4h, v0\.4h
> +**   zip1    v0\.4h, v31\.4h, v0\.4h
>  **   dup     v0\.4s, v0\.s\[0\]
>  **   ret
>  */
> @@ -280,7 +280,7 @@ TEST_64(int, int64_t, s)
>  **   fcvt    h1, s1
>  **   fmov    h31, 1.0e\+0
>  **   fmov    h0, h0
> -**   uzp1    v1\.4h, v1\.4h, v31\.4h
> +**   zip1    v1\.4h, v1\.4h, v31\.4h
>  **   dup     v0\.2s, v0\.s\[0\]
>  **   dup     v1\.2s, v1\.s\[0\]
>  **   zip1    v0\.8h, v0\.8h, v1\.8h
> @@ -292,9 +292,9 @@ TEST_64(int, int64_t, s)
>  **   fcvt    h0, s0
>  **   movi    v31\.4h, #0
>  **   fcvt    h1, s1
> -**   uzp1    v31\.4h, v31\.4h, v0\.4h
> +**   zip1    v31\.4h, v31\.4h, v0\.4h
>  **   fmov    h0, 1.0e\+0
> -**   uzp1    v0\.4h, v0\.4h, v1\.4h
> +**   zip1    v0\.4h, v0\.4h, v1\.4h
>  **   dup     v31\.2s, v31\.s\[0\]
>  **   dup     v0\.2s, v0\.s\[0\]
>  **   zip1    v0\.8h, v31\.8h, v0\.8h
> @@ -306,7 +306,7 @@ TEST_64(int, int64_t, s)
>  **   fcvt    h1, s1
>  **   fcvt    h0, s0
>  **   movi    v31\.2s, 0x3c, lsl 24
> -**   uzp1    v0\.4h, v0\.4h, v1\.4h
> +**   zip1    v0\.4h, v0\.4h, v1\.4h
>  **   dup     v0\.2s, v0\.s\[0\]
>  **   zip1    v0\.8h, v31\.8h, v0\.8h
>  **   ret
> @@ -317,8 +317,8 @@ TEST_64(int, int64_t, s)
>  **   fcvt    h1, s1
>  **   fcvt    h2, s2
>  **   fcvt    h0, s0
> -**   uzp1    v0\.4h, v0\.4h, v1\.4h
> -**   uzp1    v1\.4h, v1\.4h, v2\.4h
> +**   zip1    v0\.4h, v0\.4h, v1\.4h
> +**   zip1    v1\.4h, v1\.4h, v2\.4h
>  **   dup     v0\.2s, v0\.s\[0\]
>  **   dup     v1\.2s, v1\.s\[0\]
>  **   zip1    v0\.8h, v0\.8h, v1\.8h
> @@ -330,7 +330,7 @@ TEST_64(int, int64_t, s)
>  **   fcvt    h2, s2
>  **   fcvt    h0, s0
>  **   fcvt    h1, s1
> -**   uzp1    v0\.4h, v0\.4h, v2\.4h
> +**   zip1    v0\.4h, v0\.4h, v2\.4h
>  **   dup     v1\.4h, v1\.h\[0\]
>  **   dup     v0\.2s, v0\.s\[0\]
>  **   zip1    v0\.8h, v0\.8h, v1\.8h
> @@ -434,7 +434,7 @@ TEST_64(int, int64_t, s)
> 
>  /*
>  ** test_float32_2:
> -**   uzp1    v0\.2s, v0\.2s, v1\.2s
> +**   zip1    v0\.2s, v0\.2s, v1\.2s
>  **   dup     v0\.2d, v0\.d\[0\]
>  **   ret
>  */
> @@ -449,7 +449,7 @@ TEST_64(int, int64_t, s)
>  /*
>  ** test_float32_4:
>  **   movi    v31\.2s, #0
> -**   uzp1    v0\.2s, v31\.2s, v0\.2s
> +**   zip1    v0\.2s, v31\.2s, v0\.2s
>  **   dup     v0\.2d, v0\.d\[0\]
>  **   ret
>  */
> diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-9.c
> b/gcc/testsuite/gcc.target/aarch64/vec-init-9.c
> index 3cf05cf865e..8fccf278d31 100644
> --- a/gcc/testsuite/gcc.target/aarch64/vec-init-9.c
> +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-9.c
> @@ -75,7 +75,7 @@ int64x2_t s64q_6(int64_t a0, int64_t a1) {
> 
>  /*
>  ** f64q_1:
> -**   uzp1    v0\.2d, v0\.2d, v1\.2d
> +**   zip1    v0\.2d, v0\.2d, v1\.2d
>  **   ret
>  */
>  float64x2_t f64q_1(float64_t a0, float64_t a1) {
> @@ -98,7 +98,7 @@ float64x2_t f64q_2(float64_t a0, float64_t *ptr) {
>  /*
>  ** f64q_3:
>  **   ldr     d0, \[x0\]
> -**   uzp1    v0\.2d, v0\.2d, v1\.2d
> +**   zip1    v0\.2d, v0\.2d, v1\.2d
>  **   ret
>  */
>  float64x2_t f64q_3(float64_t a0, float64_t a1, float64_t *ptr) {
> @@ -140,7 +140,7 @@ float64x2_t f64q_6(float64_t a0, float64_t a1) {
> 
>  /*
>  ** s32q_1:
> -**   uzp1    v0\.2d, v0\.2d, v1\.2d
> +**   zip1    v0\.2d, v0\.2d, v1\.2d
>  **   ret
>  */
>  int32x4_t s32q_1(int32x2_t a0, int32x2_t a1) {
> @@ -157,7 +157,7 @@ int32x4_t s32q_2(int32x2_t a0, int32x2_t *ptr) {
>  /*
>  ** s32q_3:
>  **   ldr     d0, \[x0\]
> -**   uzp1    v0\.2d, v0\.2d, v1\.2d
> +**   zip1    v0\.2d, v0\.2d, v1\.2d
>  **   ret
>  */
>  int32x4_t s32q_3(int32x2_t a0, int32x2_t a1, int32x2_t *ptr) {
> @@ -204,7 +204,7 @@ int32x4_t s32q_6(int32x2_t a0, int32x2_t a1) {
> 
>  /*
>  ** f32q_1:
> -**   uzp1    v0\.2d, v0\.2d, v1\.2d
> +**   zip1    v0\.2d, v0\.2d, v1\.2d
>  **   ret
>  */
>  float32x4_t f32q_1(float32x2_t a0, float32x2_t a1) {
> @@ -221,7 +221,7 @@ float32x4_t f32q_2(float32x2_t a0, float32x2_t *ptr)
> {
>  /*
>  ** f32q_3:
>  **   ldr     d0, \[x0\]
> -**   uzp1    v0\.2d, v0\.2d, v1\.2d
> +**   zip1    v0\.2d, v0\.2d, v1\.2d
>  **   ret
>  */
>  float32x4_t f32q_3(float32x2_t a0, float32x2_t a1, float32x2_t *ptr) {
> --
> 2.43.0

RE: [PATCH] aarch64: use ZIP1 instead of UZP1 for concatenation [PR125550]

Reply via email to