We have ix86_expand_sse_fp_minmax to detect min/max sematics, but it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for the testcase in the PR, there's an extra move from cmp_op0 to if_true, and it failed ix86_expand_sse_fp_minmax.
This patch adds pre_reload splitter to detect the min/max pattern. Operands order in MINSS matters for signed zero and NANs, since the instruction always returns second operand when any operand is NAN or both operands are zero. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ok for trunk? gcc/ChangeLog: PR target/110170 * config/i386/i386.md (*ieee_minmax<mode>3_1): New pre_reload splitter to detect fp min/max pattern. gcc/testsuite/ChangeLog: * g++.target/i386/pr110170.C: New test. * gcc.target/i386/pr110170.c: New test. --- gcc/config/i386/i386.md | 30 +++++++++ gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr110170.c | 18 ++++++ 3 files changed, 126 insertions(+) create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e6ebc461e52..353bb21993d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -22483,6 +22483,36 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3" (set_attr "type" "sseadd") (set_attr "mode" "<MODE>")]) +;; Operands order in min/max instruction matters for signed zero and NANs. +(define_insn_and_split "*ieee_minmax<mode>3_1" + [(set (match_operand:MODEF 0 "register_operand") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand") + (match_operand:MODEF 2 "register_operand") + (lt:MODEF + (match_operand:MODEF 3 "register_operand") + (match_operand:MODEF 4 "register_operand"))] + UNSPEC_BLENDV))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && ((rtx_equal_p (operands[1], operands[3]) + && rtx_equal_p (operands[2], operands[4])) + || (rtx_equal_p (operands[1], operands[4]) + && rtx_equal_p (operands[2], operands[3]))) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + int u = (rtx_equal_p (operands[1], operands[3]) + && rtx_equal_p (operands[2], operands[4])) + ? UNSPEC_IEEE_MAX : UNSPEC_IEEE_MIN; + emit_move_insn (operands[0], + gen_rtx_UNSPEC (<MODE>mode, + gen_rtvec (2, operands[2], operands[1]), + u)); + DONE; +}) + ;; Make two stack loads independent: ;; fld aa fld aa ;; fld %st(0) -> fld bb diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C new file mode 100644 index 00000000000..1e9a781ca74 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr110170.C @@ -0,0 +1,78 @@ +/* { dg-do run } */ +/* { dg-options " -O2 -march=x86-64 -mfpmath=sse -std=gnu++20" } */ +#include <math.h> + +void +__attribute__((noinline)) +__cond_swap(double* __x, double* __y) { + bool __r = (*__x < *__y); + auto __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +} + +auto test1() { + double nan = -0.0; + double x = 0.0; + __cond_swap(&nan, &x); + return x == -0.0 && nan == 0.0; +} + +auto test1r() { + double nan = NAN; + double x = 1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 0 && nan == 1.0; +} + +auto test2() { + double nan = NAN; + double x = -1.0; + __cond_swap(&nan, &x); + return isnan(x) && signbit(x) == 0 && nan == -1.0; +} + +auto test2r() { + double nan = NAN; + double x = -1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 0 && nan == -1.0; +} + +auto test3() { + double nan = -NAN; + double x = 1.0; + __cond_swap(&nan, &x); + return isnan(x) && signbit(x) == 1 && nan == 1.0; +} + +auto test3r() { + double nan = -NAN; + double x = 1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 1 && nan == 1.0; +} + +auto test4() { + double nan = -NAN; + double x = -1.0; + __cond_swap(&nan, &x); + return isnan(x) && signbit(x) == 1 && nan == -1.0; +} + +auto test4r() { + double nan = -NAN; + double x = -1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 1 && nan == -1.0; +} + + +int main() { + if ( + !test1() || !test1r() + || !test2() || !test2r() + || !test3() || !test4r() + || !test4() || !test4r() + ) __builtin_abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c new file mode 100644 index 00000000000..0f98545cce3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110170.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options " -O2 -march=x86-64-v2 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 } } */ +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 } } */ + +void __cond_swap_df(double* __x, double* __y) { + _Bool __r = (*__x < *__y); + double __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +} + +void __cond_swap_sf(float* __x, float* __y) { + _Bool __r = (*__x < *__y); + float __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +} -- 2.39.1.388.g2fc9e9ca3c