Hi All, It looks like during my pre-commit testrun I forgot to apply this patch to the patch stack. It had a typo in the element size.
It also looks like since the hi/lo operations take different element counts for the assembler syntax that I can't have a unified pattern. This splits it into two each :( Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Sorry for the breakage, Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_uaddw<mode>_<PERM_EXTEND:perm_hilo>_zip, aarch64_usubw<mode>_<PERM_EXTEND:perm_hilo>_zip): Split into... (aarch64_uaddw<mode>_lo_zip, aarch64_uaddw<mode>_hi_zip, "aarch64_usubw<mode>_lo_zip, "aarch64_usubw<mode>_hi_zip): ... This. gcc/testsuite/ChangeLog: * gcc.target/aarch64/uxtl-combine-4.c: Fix typo. * gcc.target/aarch64/uxtl-combine-5.c: Likewise. * gcc.target/aarch64/uxtl-combine-6.c: Likewise. --- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 75ee659871080ed28b9887990b7431682c283502..80e338bb8952140dd8be178cc8aed0c47b81c775 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4810,7 +4810,7 @@ (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal" [(set_attr "type" "neon_sub_widen")] ) -(define_insn "aarch64_usubw<mode>_<PERM_EXTEND:perm_hilo>_zip" +(define_insn "aarch64_usubw<mode>_lo_zip" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") @@ -4818,23 +4818,51 @@ (define_insn "aarch64_usubw<mode>_<PERM_EXTEND:perm_hilo>_zip" (unspec:<MODE> [ (match_operand:VQW 2 "register_operand" "w") (match_operand:VQW 3 "aarch64_simd_imm_zero") - ] PERM_EXTEND) 0)))] + ] UNSPEC_ZIP1) 0)))] "TARGET_SIMD" - "usubw<PERM_EXTEND:perm_index>\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" + "usubw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" [(set_attr "type" "neon_sub_widen")] ) -(define_insn "aarch64_uaddw<mode>_<PERM_EXTEND:perm_hilo>_zip" +(define_insn "aarch64_uaddw<mode>_lo_zip" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (plus:<VWIDE> (subreg:<VWIDE> (unspec:<MODE> [ (match_operand:VQW 2 "register_operand" "w") (match_operand:VQW 3 "aarch64_simd_imm_zero") - ] PERM_EXTEND) 0) + ] UNSPEC_ZIP1) 0) (match_operand:<VWIDE> 1 "register_operand" "w")))] "TARGET_SIMD" - "uaddw<PERM_EXTEND:perm_index>\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" + "uaddw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" + [(set_attr "type" "neon_add_widen")] +) + +(define_insn "aarch64_usubw<mode>_hi_zip" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "w") + (subreg:<VWIDE> + (unspec:<MODE> [ + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "aarch64_simd_imm_zero") + ] UNSPEC_ZIP2) 0)))] + "TARGET_SIMD" + "usubw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" + [(set_attr "type" "neon_sub_widen")] +) + +(define_insn "aarch64_uaddw<mode>_hi_zip" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (subreg:<VWIDE> + (unspec:<MODE> [ + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "aarch64_simd_imm_zero") + ] UNSPEC_ZIP2) 0) + (match_operand:<VWIDE> 1 "register_operand" "w")))] + "TARGET_SIMD" + "uaddw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" [(set_attr "type" "neon_add_widen")] ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 2354315d7d249ccee46625d13b32678f1da1f087..a920de99ffca378ce518f378a35cbe2766877ee8 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2645,9 +2645,6 @@ (define_int_iterator PERMUTEQ [UNSPEC_ZIP1Q UNSPEC_ZIP2Q (define_int_iterator OPTAB_PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2 UNSPEC_UZP1 UNSPEC_UZP2]) -;; Permutes for zero extends -(define_int_iterator PERM_EXTEND [UNSPEC_ZIP1 UNSPEC_ZIP2]) - (define_int_iterator REVERSE [UNSPEC_REV64 UNSPEC_REV32 UNSPEC_REV16]) (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM @@ -3470,10 +3467,7 @@ (define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32") (UNSPEC_REV16 "16")]) (define_int_attr perm_hilo [(UNSPEC_UNPACKSHI "hi") (UNSPEC_UNPACKUHI "hi") - (UNSPEC_UNPACKSLO "lo") (UNSPEC_UNPACKULO "lo") - (UNSPEC_ZIP2 "hi") (UNSPEC_ZIP1 "lo")]) - -(define_int_attr perm_index [(UNSPEC_ZIP2 "2") (UNSPEC_ZIP1 "")]) + (UNSPEC_UNPACKSLO "lo") (UNSPEC_UNPACKULO "lo")]) ;; Return true if the associated optab refers to the high-numbered lanes, ;; false if it refers to the low-numbered lanes. The convention is for diff --git a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-4.c b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-4.c index e1a9c4f5661a36ec7b2c5dc6f0fd85c42fcaac39..67944f70ecceff7ed833de86b76606547f3db76c 100755 --- a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-4.c +++ b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-4.c @@ -16,5 +16,5 @@ void d2 (SIGN TYPE2 * restrict a, SIGN TYPE1 *b, int n) /* { dg-final { scan-assembler-not {\tzip1\t} } } */ /* { dg-final { scan-assembler-not {\tzip2\t} } } */ /* { dg-final { scan-assembler-times {\tsxtl\t} 1 } } */ -/* { dg-final { scan-assembler-time {\tsxtl2\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tsxtl2\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-5.c b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-5.c index 92b09ba4abba80f240ac175be2ef880968534975..e691c4f0b595d1a60b445415970c5a67d7dd0419 100755 --- a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-5.c +++ b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-5.c @@ -16,5 +16,5 @@ void d2 (SIGN TYPE2 * restrict a, SIGN TYPE1 *b, int n) /* { dg-final { scan-assembler-not {\tzip1\t} } } */ /* { dg-final { scan-assembler-not {\tzip2\t} } } */ /* { dg-final { scan-assembler-times {\tsxtl\t} 1 } } */ -/* { dg-final { scan-assembler-time {\tsxtl2\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tsxtl2\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-6.c b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-6.c index 5c6e635f29d1e52f51f5b75a477f7d8744f32ca3..9383f7ebf9355ff471c48549fc0e4c07706601f1 100755 --- a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-6.c +++ b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-6.c @@ -16,5 +16,5 @@ void d2 (SIGN TYPE2 * restrict a, SIGN TYPE1 *b, int n) /* { dg-final { scan-assembler-not {\tzip1\t} } } */ /* { dg-final { scan-assembler-not {\tzip2\t} } } */ /* { dg-final { scan-assembler-times {\tsxtl\t} 1 } } */ -/* { dg-final { scan-assembler-time {\tsxtl2\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tsxtl2\t} 1 } } */ --
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 75ee659871080ed28b9887990b7431682c283502..80e338bb8952140dd8be178cc8aed0c47b81c775 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4810,7 +4810,7 @@ (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal" [(set_attr "type" "neon_sub_widen")] ) -(define_insn "aarch64_usubw<mode>_<PERM_EXTEND:perm_hilo>_zip" +(define_insn "aarch64_usubw<mode>_lo_zip" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") @@ -4818,23 +4818,51 @@ (define_insn "aarch64_usubw<mode>_<PERM_EXTEND:perm_hilo>_zip" (unspec:<MODE> [ (match_operand:VQW 2 "register_operand" "w") (match_operand:VQW 3 "aarch64_simd_imm_zero") - ] PERM_EXTEND) 0)))] + ] UNSPEC_ZIP1) 0)))] "TARGET_SIMD" - "usubw<PERM_EXTEND:perm_index>\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" + "usubw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" [(set_attr "type" "neon_sub_widen")] ) -(define_insn "aarch64_uaddw<mode>_<PERM_EXTEND:perm_hilo>_zip" +(define_insn "aarch64_uaddw<mode>_lo_zip" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (plus:<VWIDE> (subreg:<VWIDE> (unspec:<MODE> [ (match_operand:VQW 2 "register_operand" "w") (match_operand:VQW 3 "aarch64_simd_imm_zero") - ] PERM_EXTEND) 0) + ] UNSPEC_ZIP1) 0) (match_operand:<VWIDE> 1 "register_operand" "w")))] "TARGET_SIMD" - "uaddw<PERM_EXTEND:perm_index>\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" + "uaddw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" + [(set_attr "type" "neon_add_widen")] +) + +(define_insn "aarch64_usubw<mode>_hi_zip" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "w") + (subreg:<VWIDE> + (unspec:<MODE> [ + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "aarch64_simd_imm_zero") + ] UNSPEC_ZIP2) 0)))] + "TARGET_SIMD" + "usubw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" + [(set_attr "type" "neon_sub_widen")] +) + +(define_insn "aarch64_uaddw<mode>_hi_zip" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (plus:<VWIDE> + (subreg:<VWIDE> + (unspec:<MODE> [ + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "aarch64_simd_imm_zero") + ] UNSPEC_ZIP2) 0) + (match_operand:<VWIDE> 1 "register_operand" "w")))] + "TARGET_SIMD" + "uaddw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" [(set_attr "type" "neon_add_widen")] ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 2354315d7d249ccee46625d13b32678f1da1f087..a920de99ffca378ce518f378a35cbe2766877ee8 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2645,9 +2645,6 @@ (define_int_iterator PERMUTEQ [UNSPEC_ZIP1Q UNSPEC_ZIP2Q (define_int_iterator OPTAB_PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2 UNSPEC_UZP1 UNSPEC_UZP2]) -;; Permutes for zero extends -(define_int_iterator PERM_EXTEND [UNSPEC_ZIP1 UNSPEC_ZIP2]) - (define_int_iterator REVERSE [UNSPEC_REV64 UNSPEC_REV32 UNSPEC_REV16]) (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM @@ -3470,10 +3467,7 @@ (define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32") (UNSPEC_REV16 "16")]) (define_int_attr perm_hilo [(UNSPEC_UNPACKSHI "hi") (UNSPEC_UNPACKUHI "hi") - (UNSPEC_UNPACKSLO "lo") (UNSPEC_UNPACKULO "lo") - (UNSPEC_ZIP2 "hi") (UNSPEC_ZIP1 "lo")]) - -(define_int_attr perm_index [(UNSPEC_ZIP2 "2") (UNSPEC_ZIP1 "")]) + (UNSPEC_UNPACKSLO "lo") (UNSPEC_UNPACKULO "lo")]) ;; Return true if the associated optab refers to the high-numbered lanes, ;; false if it refers to the low-numbered lanes. The convention is for diff --git a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-4.c b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-4.c index e1a9c4f5661a36ec7b2c5dc6f0fd85c42fcaac39..67944f70ecceff7ed833de86b76606547f3db76c 100755 --- a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-4.c +++ b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-4.c @@ -16,5 +16,5 @@ void d2 (SIGN TYPE2 * restrict a, SIGN TYPE1 *b, int n) /* { dg-final { scan-assembler-not {\tzip1\t} } } */ /* { dg-final { scan-assembler-not {\tzip2\t} } } */ /* { dg-final { scan-assembler-times {\tsxtl\t} 1 } } */ -/* { dg-final { scan-assembler-time {\tsxtl2\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tsxtl2\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-5.c b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-5.c index 92b09ba4abba80f240ac175be2ef880968534975..e691c4f0b595d1a60b445415970c5a67d7dd0419 100755 --- a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-5.c +++ b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-5.c @@ -16,5 +16,5 @@ void d2 (SIGN TYPE2 * restrict a, SIGN TYPE1 *b, int n) /* { dg-final { scan-assembler-not {\tzip1\t} } } */ /* { dg-final { scan-assembler-not {\tzip2\t} } } */ /* { dg-final { scan-assembler-times {\tsxtl\t} 1 } } */ -/* { dg-final { scan-assembler-time {\tsxtl2\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tsxtl2\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-6.c b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-6.c index 5c6e635f29d1e52f51f5b75a477f7d8744f32ca3..9383f7ebf9355ff471c48549fc0e4c07706601f1 100755 --- a/gcc/testsuite/gcc.target/aarch64/uxtl-combine-6.c +++ b/gcc/testsuite/gcc.target/aarch64/uxtl-combine-6.c @@ -16,5 +16,5 @@ void d2 (SIGN TYPE2 * restrict a, SIGN TYPE1 *b, int n) /* { dg-final { scan-assembler-not {\tzip1\t} } } */ /* { dg-final { scan-assembler-not {\tzip2\t} } } */ /* { dg-final { scan-assembler-times {\tsxtl\t} 1 } } */ -/* { dg-final { scan-assembler-time {\tsxtl2\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tsxtl2\t} 1 } } */