Re: [PATCH]AArch64: Use SVE unpredicated LOGICAL expressions when Advanced SIMD inefficient [PR109154]

Richard Sandiford Thu, 09 Nov 2023 02:39:10 -0800

Tamar Christina <tamar.christ...@arm.com> writes:
>> >> > +  "&& TARGET_SVE && rtx_equal_p (operands[0], operands[1])
>> >> > +   && satisfies_constraint_<lconst> (operands[2])
>> >> > +   && FP_REGNUM_P (REGNO (operands[0]))"
>> >> > +  [(const_int 0)]
>> >> > +  {
>> >> > +    rtx op1 = lowpart_subreg (<VCONV>mode, operands[1],
>> <MODE>mode);
>> >> > +    rtx op2 = gen_const_vec_duplicate (<VCONV>mode, operands[2]);
>> >> > +    emit_insn (gen_<optab><vconv>3 (op1, op1, op2));
>> >> > +    DONE;
>> >> > +  }
>> >> >  )
>> >>
>> >> The WIP SME patches add a %Z modifier for 'z' register prefixes,
>> >> similarly to b/h/s/d for scalar FP.  With that I think the alternative 
>> >> can be:
>> >>
>> >>      [w , 0 , <lconst>; *         , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2
>> >>
>> >> although it would be nice to keep the hex constant.
>> >
>> > My original patch added a %u for (undecorated) which just prints the
>> > register number and changed %C to also accept a single constant instead of
>> only a uniform vector.
>> 
>> Not saying no to %u in future, but %Z seems more consistent with the current
>> approach.  And yeah, I'd also wondered about extending %C.
>> The problem is guessing whether to print a 32-bit, 64-bit or 128-bit constant
>> for negative immediates.
>> 
>
> Rebased patch,
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>       PR tree-optimization/109154
>       * config/aarch64/aarch64.md (<optab><mode>3): Add SVE case.
>       * config/aarch64/aarch64-simd.md (ior<mode>3<vczle><vczbe>): Likewise.
>       * config/aarch64/iterators.md (VCONV, vconv): New.
>       * config/aarch64/predicates.md(aarch64_orr_imm_sve_advsimd): New.
>
> gcc/testsuite/ChangeLog:
>
>       PR tree-optimization/109154
>       * gcc.target/aarch64/sve/fneg-abs_1.c: Updated.
>       * gcc.target/aarch64/sve/fneg-abs_2.c: Updated.
>       * gcc.target/aarch64/sve/fneg-abs_4.c: Updated.
>
> --- inline copy of patch --
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 33eceb436584ff73c7271f93639f2246d1af19e0..98c418c54a82a348c597310caa23916f9c16f9b6
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -1219,11 +1219,14 @@ (define_insn "and<mode>3<vczle><vczbe>"
>  (define_insn "ior<mode>3<vczle><vczbe>"
>    [(set (match_operand:VDQ_I 0 "register_operand")
>       (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand")
> -                (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm")))]
> -  "TARGET_SIMD"
> -  {@ [ cons: =0 , 1 , 2   ]
> -     [ w        , w , w   ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
> -     [ w        , 0 , Do  ] << aarch64_output_simd_mov_immediate 
> (operands[2], <bitsize>, AARCH64_CHECK_ORR);
> +                (match_operand:VDQ_I 2 "aarch64_orr_imm_sve_advsimd")))]
> +  "TARGET_SIMD"
> +  {@ [ cons: =0 , 1 , 2; attrs: arch ]
> +     [ w        , w , w  ; simd      ] orr\t%0.<Vbtype>, %1.<Vbtype>, 
> %2.<Vbtype>
> +     [ w        , 0 , vsl; sve       ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
> +     [ w        , 0 , Do ; simd      ] \
> +       << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \
> +                                          AARCH64_CHECK_ORR);
>    }
>    [(set_attr "type" "neon_logic<q>")]
>  )
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 4fcd71a2e9d1e8c35f35593255c4f66a68856a79..c6b1506fe7b47dd40741f26ef0cc92692008a631
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -4599,7 +4599,8 @@ (define_insn "<optab><mode>3"
>    ""
>    {@ [ cons: =0 , 1  , 2        ; attrs: type , arch  ]
>       [ r        , %r , r        ; logic_reg   , *     ] <logical>\t%<w>0, 
> %<w>1, %<w>2
> -     [ rk       , r  , <lconst> ; logic_imm   , *     ] <logical>\t%<w>0, 
> %<w>1, %2
> +     [ rk       , ^r , <lconst> ; logic_imm   , *     ] <logical>\t%<w>0, 
> %<w>1, %2
> +     [ w        , 0  , <lconst> ; *           , sve   ] <logical>\t%Z0.<s>, 
> %Z0.<s>, #%2
>       [ w        , w  , w        ; neon_logic  , simd  ] 
> <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
>    }
>  )
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 
> 1593a8fd04f91259295d0e393cbc7973daf7bf73..d24109b4fe6a867125b9474d34d616155bc36b3f
>  100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1435,6 +1435,19 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
>                        (HI   "V8HI") (QI   "V16QI")
>                        (SF   "V4SF") (DF   "V2DF")])
>  
> +;; 128-bit container modes for the lower part of an SVE vector to the inner 
> or
> +;; neon source mode.
> +(define_mode_attr VCONV [(SI   "VNx4SI")  (DI    "VNx2DI")
> +                      (V8QI "VNx16QI") (V16QI "VNx16QI")
> +                      (V4HI "VNx8HI")  (V8HI  "VNx8HI")
> +                      (V2SI "VNx4SI")  (V4SI  "VNx4SI")
> +                      (V2DI "VNx2DI")])
> +(define_mode_attr vconv [(SI   "vnx4si")  (DI    "vnx2di")
> +                      (V8QI "vnx16qi") (V16QI "vnx16qi")
> +                      (V4HI "vnx8hi")  (V8HI  "vnx8hi")
> +                      (V2SI "vnx4si")  (V4SI  "vnx4si")
> +                      (V2DI "vnx2di")])
> +
>  ;; Half modes of all vector modes.
>  (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
>                        (V4HI "V2HI")  (V8HI  "V4HI")


These attributes arne't needed any more (at least, not by this patch).
OK for trunk with those removed.

Thanks,
Richard

> diff --git a/gcc/config/aarch64/predicates.md 
> b/gcc/config/aarch64/predicates.md
> index 
> 01de47439744acb3708c645b98eaa607294a1f1f..a73724a7fc05636d4c0643a291f40f2609564778
>  100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -871,6 +871,11 @@ (define_predicate "aarch64_sve_logical_operand"
>    (ior (match_operand 0 "register_operand")
>         (match_operand 0 "aarch64_sve_logical_immediate")))
>  
> +(define_predicate "aarch64_orr_imm_sve_advsimd"
> +  (ior (match_operand 0 "aarch64_reg_or_orr_imm")
> +       (and (match_test "TARGET_SVE")
> +         (match_operand 0 "aarch64_sve_logical_operand"))))
> +
>  (define_predicate "aarch64_sve_gather_offset_b"
>    (ior (match_operand 0 "register_operand")
>         (match_operand 0 "aarch64_sve_gather_immediate_b")))
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> index 
> 0c7664e6de77a497682952653ffd417453854d52..a8b27199ff83d0eebadfc7dcf03f94e1229d76b8
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> @@ -6,7 +6,7 @@
>  
>  /*
>  ** t1:
> -**   orr     v[0-9]+.2s, #128, lsl #24
> +**   orr     z[0-9]+.s, z[0-9]+.s, #-2147483648
>  **   ret
>  */
>  float32x2_t t1 (float32x2_t a)
> @@ -16,7 +16,7 @@ float32x2_t t1 (float32x2_t a)
>  
>  /*
>  ** t2:
> -**   orr     v[0-9]+.4s, #128, lsl #24
> +**   orr     z[0-9]+.s, z[0-9]+.s, #-2147483648
>  **   ret
>  */
>  float32x4_t t2 (float32x4_t a)
> @@ -26,9 +26,7 @@ float32x4_t t2 (float32x4_t a)
>  
>  /*
>  ** t3:
> -**   adrp    x0, .LC[0-9]+
> -**   ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> -**   orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +**   orr     z[0-9]+.d, z[0-9]+.d, #-9223372036854775808
>  **   ret
>  */
>  float64x2_t t3 (float64x2_t a)
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> index 
> a60cd31b9294af2dac69eed1c93f899bd5c78fca..19a7695e605bc8aced486a9c450d1cdc6be4691a
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> @@ -7,8 +7,7 @@
>  
>  /*
>  ** f1:
> -**   movi    v[0-9]+.2s, 0x80, lsl 24
> -**   orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**   orr     z0.s, z0.s, #-2147483648
>  **   ret
>  */
>  float32_t f1 (float32_t a)
> @@ -18,9 +17,7 @@ float32_t f1 (float32_t a)
>  
>  /*
>  ** f2:
> -**   mov     x0, -9223372036854775808
> -**   fmov    d[0-9]+, x0
> -**   orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**   orr     z0.d, z0.d, #-9223372036854775808
>  **   ret
>  */
>  float64_t f2 (float64_t a)
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> index 
> 21f2a8da2a5d44e3d01f6604ca7be87e3744d494..663d5fe17e091d128313b6b8b8dc918a01a96c4f
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> @@ -6,9 +6,7 @@
>  
>  /*
>  ** negabs:
> -**   mov     x0, -9223372036854775808
> -**   fmov    d[0-9]+, x0
> -**   orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**   orr     z0.d, z0.d, #-9223372036854775808
>  **   ret
>  */
>  double negabs (double x)
> @@ -22,8 +20,7 @@ double negabs (double x)
>  
>  /*
>  ** negabsf:
> -**   movi    v[0-9]+.2s, 0x80, lsl 24
> -**   orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**   orr     z0.s, z0.s, #-2147483648
>  **   ret
>  */
>  float negabsf (float x)

Re: [PATCH]AArch64: Use SVE unpredicated LOGICAL expressions when Advanced SIMD inefficient [PR109154]

Reply via email to