> Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
> and the other emitting UNSPEC_IEEE_MIN.
Splitted.
> The test involves blendv instruction, which is SSE4.1, so it is
> pointless to test it without -msse4.1. Please add -msse4.1 instead of
> -march=x86_64 and use sse4_runtime target selector, as is the case
> with gcc.target/i386/pr90358.c.
Changed.
> Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
> the test is valid also for 32bit targets, you should use -msseregparm
> additional options for ia32 (please see gcc.target/i386/pr43546.c
> testcase) in the same way as -mregparm to pass SSE arguments in
> registers.
32-bit target still failed to do condition elimination for DFmode due to
below code in rtx_cost
/* A size N times larger than UNITS_PER_WORD likely needs N times as
many insns, taking N times as long. */
factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;
It looks like a separate issue for DFmode operation under 32-bit target.
I've enable 32-bit for the testcase, but only scan for minss/maxss
currently.
Here's updated patch.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?
We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
the testcase in the PR, there's an extra move from cmp_op0 to if_true,
and it failed ix86_expand_sse_fp_minmax.
This patch adds pre_reload splitter to detect the min/max pattern.
Operands order in MINSS matters for signed zero and NANs, since the
instruction always returns second operand when any operand is NAN or
both operands are zero.
gcc/ChangeLog:
PR target/110170
* config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload
splitter to detect fp max pattern.
(*ieee_min<mode>3_1): Ditto, but for fp min pattern.
gcc/testsuite/ChangeLog:
* g++.target/i386/pr110170.C: New test.
* gcc.target/i386/pr110170.c: New test.
---
gcc/config/i386/i386.md | 43 +++++++++++++
gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr110170.c | 21 +++++++
3 files changed, 142 insertions(+)
create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a82cc353cfd..6f415f899ae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -23163,6 +23163,49 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
(set_attr "type" "sseadd")
(set_attr "mode" "<MODE>")])
+;; Operands order in min/max instruction matters for signed zero and NANs.
+(define_insn_and_split "*ieee_max<mode>3_1"
+ [(set (match_operand:MODEF 0 "register_operand")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand")
+ (match_operand:MODEF 2 "register_operand")
+ (lt:MODEF
+ (match_operand:MODEF 3 "register_operand")
+ (match_operand:MODEF 4 "register_operand"))]
+ UNSPEC_BLENDV))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && (rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:MODEF
+ [(match_dup 2)
+ (match_dup 1)]
+ UNSPEC_IEEE_MAX))])
+
+(define_insn_and_split "*ieee_min<mode>3_1"
+ [(set (match_operand:MODEF 0 "register_operand")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand")
+ (match_operand:MODEF 2 "register_operand")
+ (lt:MODEF
+ (match_operand:MODEF 3 "register_operand")
+ (match_operand:MODEF 4 "register_operand"))]
+ UNSPEC_BLENDV))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3]))
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:MODEF
+ [(match_dup 2)
+ (match_dup 1)]
+ UNSPEC_IEEE_MIN))])
+
;; Make two stack loads independent:
;; fld aa fld aa
;; fld %st(0) -> fld bb
diff --git a/gcc/testsuite/g++.target/i386/pr110170.C
b/gcc/testsuite/g++.target/i386/pr110170.C
new file mode 100644
index 00000000000..5d6842270d0
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr110170.C
@@ -0,0 +1,78 @@
+/* { dg-do run } */
+/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */
+#include <math.h>
+
+void
+__attribute__((noinline))
+__cond_swap(double* __x, double* __y) {
+ bool __r = (*__x < *__y);
+ auto __tmp = __r ? *__x : *__y;
+ *__y = __r ? *__y : *__x;
+ *__x = __tmp;
+}
+
+auto test1() {
+ double nan = -0.0;
+ double x = 0.0;
+ __cond_swap(&nan, &x);
+ return x == -0.0 && nan == 0.0;
+}
+
+auto test1r() {
+ double nan = NAN;
+ double x = 1.0;
+ __cond_swap(&x, &nan);
+ return isnan(x) && signbit(x) == 0 && nan == 1.0;
+}
+
+auto test2() {
+ double nan = NAN;
+ double x = -1.0;
+ __cond_swap(&nan, &x);
+ return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test2r() {
+ double nan = NAN;
+ double x = -1.0;
+ __cond_swap(&x, &nan);
+ return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test3() {
+ double nan = -NAN;
+ double x = 1.0;
+ __cond_swap(&nan, &x);
+ return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test3r() {
+ double nan = -NAN;
+ double x = 1.0;
+ __cond_swap(&x, &nan);
+ return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test4() {
+ double nan = -NAN;
+ double x = -1.0;
+ __cond_swap(&nan, &x);
+ return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+auto test4r() {
+ double nan = -NAN;
+ double x = -1.0;
+ __cond_swap(&x, &nan);
+ return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+
+int main() {
+ if (
+ !test1() || !test1r()
+ || !test2() || !test2r()
+ || !test3() || !test4r()
+ || !test4() || !test4r()
+ ) __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c
b/gcc/testsuite/gcc.target/i386/pr110170.c
new file mode 100644
index 00000000000..c72f73398a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110170.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } }
} */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } }
} */
+/* Ideally cond_swap_df is also optimized to minsd/maxsd. */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
+
+void __cond_swap_df(double* __x, double* __y) {
+ _Bool __r = (*__x < *__y);
+ double __tmp = __r ? *__x : *__y;
+ *__y = __r ? *__y : *__x;
+ *__x = __tmp;
+}
+
+void __cond_swap_sf(float* __x, float* __y) {
+ _Bool __r = (*__x < *__y);
+ float __tmp = __r ? *__x : *__y;
+ *__y = __r ? *__y : *__x;
+ *__x = __tmp;
+}
--
2.39.1.388.g2fc9e9ca3c