On Wed, Nov 13, 2024 at 10:00 AM Hongyu Wang <hongyu.w...@intel.com> wrote:
>
> Hi,
>
> For cstorebf4 it uses comparison_operator for BFmode compare, which is
> incorrect when directly uses ix86_expand_setcc as it does not canonicalize
> the input comparison to correct the compare code by swapping operands.
> Since the original code without AVX10.2 calls emit_store_flag_force, who
> actually calls to emit_store_flags_1 and recurisive calls to this expander
> again with swapped operand and flag.
> Therefore, we can avoid do the redundant recurisive call by adjusting
> the comparison_operator to ix86_fp_comparison_operator, and calls
> ix86_expand_setcc directly.
>
> Bootstrapped & regtested on x86-64-pc-linux-gnu.
>
> Ok for trunk?
Ok.
>
> gcc/ChangeLog:
>
>         PR target/117495
>         * config/i386/i386.md (cstorebf4): Use ix86_fp_comparison_operator
>         and calls ix86_expand_setcc directly.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/117495
>         * gcc.target/i386/pr117495.c: New test.
> ---
>  gcc/config/i386/i386.md                  | 18 +++++++---------
>  gcc/testsuite/gcc.target/i386/pr117495.c | 26 ++++++++++++++++++++++++
>  2 files changed, 33 insertions(+), 11 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr117495.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index f4aae80b7a9..335b2f74217 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -1853,23 +1853,19 @@ (define_expand "cstorebf4"
>         (compare:CC (match_operand:BF 2 "cmp_fp_expander_operand")
>                     (match_operand:BF 3 "cmp_fp_expander_operand")))
>     (set (match_operand:QI 0 "register_operand")
> -       (match_operator 1 "comparison_operator"
> +       (match_operator 1 "ix86_fp_comparison_operator"
>           [(reg:CC FLAGS_REG)
>            (const_int 0)]))]
>    "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
>  {
> -  if (TARGET_AVX10_2_256 && !flag_trapping_math)
> -    ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
> -                      operands[2], operands[3]);
> -  else
> +  rtx op2 = operands[2], op3 = operands[3];
> +  if (!TARGET_AVX10_2_256 || flag_trapping_math)
>      {
> -      rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
> -      rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
> -      rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
> -                                      op1, op2, SFmode, 0, 1);
> -      if (!rtx_equal_p (res, operands[0]))
> -      emit_move_insn (operands[0], res);
> +      op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
> +      op3 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
>      }
> +  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
> +                    op2, op3);
>    DONE;
>  })
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr117495.c 
> b/gcc/testsuite/gcc.target/i386/pr117495.c
> new file mode 100644
> index 00000000000..274b6cef361
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr117495.c
> @@ -0,0 +1,26 @@
> +/* PR target/117495 */
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64-v3 -fno-trapping-math" } */
> +/* { dg-final { scan-assembler-times "vcomsbf16" 2 } } */
> +
> +__attribute__((target("avx10.2")))
> +int foo (int b, int x)
> +{
> +  return (__bf16) b < x;
> +}
> +
> +int foo2 (int b, int x)
> +{
> +  return (__bf16) b < x;
> +}
> +
> +__attribute__((target("avx10.2")))
> +int foo3 (__bf16 b, __bf16 x)
> +{
> +  return (__bf16) b < x;
> +}
> +
> +int foo4 (__bf16 b, __bf16 x)
> +{
> +  return (__bf16) b < x;
> +}
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to