commit 77473a27bae04da99d6979d43e7bd0a8106f4557
Author: H.J. Lu <hjl.to...@gmail.com>
Date:   Thu Jun 26 06:08:51 2025 +0800

    x86: Also handle all 1s float vector constant

replaces

(insn 29 28 30 5 (set (reg:V2SF 107)
        (mem/u/c:V2SF (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S8 A64])) 2031
 {*movv2sf_internal}
     (expr_list:REG_EQUAL (const_vector:V2SF [
                (const_double:SF -QNaN [-QNaN]) repeated x2
            ])
        (nil)))

with

(insn 98 13 14 3 (set (reg:V8QI 112)
        (const_vector:V8QI [
                (const_int -1 [0xffffffffffffffff]) repeated x8
            ])) -1
     (nil))
...
(insn 29 28 30 5 (set (reg:V2SF 107)
        (subreg:V2SF (reg:V8QI 112) 0)) 2031 {*movv2sf_internal}
     (expr_list:REG_EQUAL (const_vector:V2SF [
                (const_double:SF -QNaN [-QNaN]) repeated x2
            ])
        (nil)))

which leads to

pr121015.c: In function ‘render_result_from_bake_h’:
pr121015.c:34:1: error: unrecognizable insn:
   34 | }
      | ^
(insn 98 13 14 3 (set (reg:V8QI 112)
        (const_vector:V8QI [
                (const_int -1 [0xffffffffffffffff]) repeated x8
            ])) -1
     (expr_list:REG_EQUIV (const_vector:V8QI [
                (const_int -1 [0xffffffffffffffff]) repeated x8
            ])
        (nil)))
during RTL pass: ira

1. Add vector_const0_or_m1_operand for vector 0 or integer vector -1.
2. Add nonimm_or_vector_const0_or_m1_operand for nonimmediate, vector 0
or integer vector -1 operand.
3. Add BX constraint for MMX vector constant all 0s/1s operand.
4. Update MMXMODE:*mov<mode>_internal to support integer all 1s vectors.
Replace <v,C> with <v,BX> to generate

pcmpeqd %xmm0, %xmm0

for

(set (reg/i:V8QI 20 xmm0)
     (const_vector:V8QI [(const_int -1 [0xffffffffffffffff]) repeated x8]))

NB: The upper 64 bits in XMM0 are all 1s, instead of all 0s.

5. Update 32-bit MMXMODE move splitter to also split all 1s vector source
operand.

gcc/

PR target/121015
* config/i386/constraints.md (BX): New constraint.
* config/i386/i386.cc (ix86_print_operand): Support CONSTM1_RTX.
* config/i386/mmx.md (MMXMODE:*mov<mode>_internal): Replace C with
BX for memory and integer register destination.  Replace <v,C>
with <v,BX>.
Update 32-bit MMXMODE move splitter to also split all 1s vector
source operand.
* config/i386/predicates.md (vector_const0_or_m1_operand): New
predicate.
(nonimm_or_vector_const0_or_m1_operand): Likewise.

gcc/testsuite/

PR target/121015
* gcc.target/i386/pr106022-2.c: Adjusted.
* gcc.target/i386/pr121015-1.c: New test.
* gcc.target/i386/pr121015-2.c: Likewise.
* gcc.target/i386/pr121015-3.c: Likewise.
* gcc.target/i386/pr121015-4.c: Likewise.
* gcc.target/i386/pr121015-5.c: Likewise.
* gcc.target/i386/pr121015-6.c: Likewise.

OK for master?

Thanks.

-- 
H.J.
From a6e90a205d69f0a192fb885a96bbbe0f8e7c7819 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Thu, 10 Jul 2025 06:21:58 +0800
Subject: [PATCH v2] x86: Update MMXMODE:*mov<mode>_internal to support all 1s
 vectors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 77473a27bae04da99d6979d43e7bd0a8106f4557
Author: H.J. Lu <hjl.to...@gmail.com>
Date:   Thu Jun 26 06:08:51 2025 +0800

    x86: Also handle all 1s float vector constant

replaces

(insn 29 28 30 5 (set (reg:V2SF 107)
        (mem/u/c:V2SF (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S8 A64])) 2031 {*movv2sf_internal}
     (expr_list:REG_EQUAL (const_vector:V2SF [
                (const_double:SF -QNaN [-QNaN]) repeated x2
            ])
        (nil)))

with

(insn 98 13 14 3 (set (reg:V8QI 112)
        (const_vector:V8QI [
                (const_int -1 [0xffffffffffffffff]) repeated x8
            ])) -1
     (nil))
...
(insn 29 28 30 5 (set (reg:V2SF 107)
        (subreg:V2SF (reg:V8QI 112) 0)) 2031 {*movv2sf_internal}
     (expr_list:REG_EQUAL (const_vector:V2SF [
                (const_double:SF -QNaN [-QNaN]) repeated x2
            ])
        (nil)))

which leads to

pr121015.c: In function ‘render_result_from_bake_h’:
pr121015.c:34:1: error: unrecognizable insn:
   34 | }
      | ^
(insn 98 13 14 3 (set (reg:V8QI 112)
        (const_vector:V8QI [
                (const_int -1 [0xffffffffffffffff]) repeated x8
            ])) -1
     (expr_list:REG_EQUIV (const_vector:V8QI [
                (const_int -1 [0xffffffffffffffff]) repeated x8
            ])
        (nil)))
during RTL pass: ira

1. Add vector_const0_or_m1_operand for vector 0 or integer vector -1.
2. Add nonimm_or_vector_const0_or_m1_operand for nonimmediate, vector 0
or integer vector -1 operand.
3. Add BX constraint for MMX vector constant all 0s/1s operand.
4. Update MMXMODE:*mov<mode>_internal to support integer all 1s vectors.
Replace <v,C> with <v,BX> to generate

pcmpeqd	%xmm0, %xmm0

for

(set (reg/i:V8QI 20 xmm0)
     (const_vector:V8QI [(const_int -1 [0xffffffffffffffff]) repeated x8]))

NB: The upper 64 bits in XMM0 are all 1s, instead of all 0s.

5. Update 32-bit MMXMODE move splitter to also split all 1s vector source
operand.

gcc/

	PR target/121015
	* config/i386/constraints.md (BX): New constraint.
	* config/i386/i386.cc (ix86_print_operand): Support CONSTM1_RTX.
	* config/i386/mmx.md (MMXMODE:*mov<mode>_internal): Replace C with
	BX for memory and integer register destination.  Replace <v,C>
	with <v,BX>.
	Update 32-bit MMXMODE move splitter to also split all 1s vector
	source operand.
	* config/i386/predicates.md (vector_const0_or_m1_operand): New
	predicate.
	(nonimm_or_vector_const0_or_m1_operand): Likewise.

gcc/testsuite/

	PR target/121015
	* gcc.target/i386/pr106022-2.c: Adjusted.
	* gcc.target/i386/pr121015-1.c: New test.
	* gcc.target/i386/pr121015-2.c: Likewise.
	* gcc.target/i386/pr121015-3.c: Likewise.
	* gcc.target/i386/pr121015-4.c: Likewise.
	* gcc.target/i386/pr121015-5.c: Likewise.
	* gcc.target/i386/pr121015-6.c: Likewise.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/constraints.md             |  6 ++++
 gcc/config/i386/i386.cc                    | 11 +++++--
 gcc/config/i386/mmx.md                     | 13 ++++----
 gcc/config/i386/predicates.md              | 12 ++++++++
 gcc/testsuite/gcc.target/i386/pr106022-2.c |  4 +--
 gcc/testsuite/gcc.target/i386/pr121015-1.c | 34 +++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr121015-2.c | 14 +++++++++
 gcc/testsuite/gcc.target/i386/pr121015-3.c | 35 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr121015-4.c | 22 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr121015-5.c | 21 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr121015-6.c | 23 ++++++++++++++
 11 files changed, 185 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-6.c

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 38877a7e61b..0d563298857 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -174,6 +174,7 @@ (define_register_constraint "YW"
 ;;     and zero-extand to 256/512bit, or 128bit all ones
 ;;     and zero-extend to 512bit.
 ;;  M  x86-64 memory operand.
+;;  X  MMX vector constant with all 0s/1s operand.
 
 (define_constraint "Bf"
   "@internal Flags register operand."
@@ -246,6 +247,11 @@ (define_constraint "BM"
        (match_test "memory_address_addr_space_p (GET_MODE (op), XEXP (op, 0),
 						 MEM_ADDR_SPACE (op))")))
 
+(define_constraint "BX"
+  "@internal MMX vector constant all 0s/1s operand."
+  (and (match_test "TARGET_MMX || TARGET_MMX_WITH_SSE")
+       (match_operand 0 "vector_const0_or_m1_operand")))
+
 ;; Integer constant constraints.
 (define_constraint "Wb"
   "Integer constant in the range 0 @dots{} 7, for 8-bit shifts."
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index ad7360ec71a..8d5e9dab8e8 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14671,9 +14671,14 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	 since we can in fact encode that into an immediate.  */
       if (GET_CODE (x) == CONST_VECTOR)
 	{
-	  if (x != CONST0_RTX (GET_MODE (x)))
-	    output_operand_lossage ("invalid vector immediate");
-	  x = const0_rtx;
+	  if (x == CONSTM1_RTX (GET_MODE (x)))
+	    x = constm1_rtx;
+	  else
+	    {
+	      if (x != CONST0_RTX (GET_MODE (x)))
+		output_operand_lossage ("invalid vector immediate");
+	      x = const0_rtx;
+	    }
 	}
 
       if (code == 'P')
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 79202323e53..1dab0d77af3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -183,9 +183,9 @@ (define_expand "mov<mode>"
 
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand"
-    "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r  ,?!y,v,v,v,m,r,v,!y,*x")
-	(match_operand:MMXMODE 1 "nonimm_or_0_operand"
-    "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!y,r  ,C,v,m,v,v,r,*x,!y"))]
+    "=r  ,o  ,r ,r ,m  ,?!y,!y,?!y,m  ,r  ,?!y,v ,v,v,m,r,v,!y,*x")
+	(match_operand:MMXMODE 1 "nonimm_or_vector_const0_or_m1_operand"
+    "rBXo,rBX,BX,rm,rBX,C  ,!y,m  ,?!y,?!y,r  ,BX,v,m,v,v,r,*x,!y"))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
@@ -268,7 +268,10 @@ (define_insn "*mov<mode>_internal"
        (const_string "*")))
    (set (attr "mode")
      (cond [(eq_attr "alternative" "2")
-	      (const_string "SI")
+          (if_then_else
+            (match_test "operands[1] == CONST0_RTX (<MODE>mode)")
+            (const_string "SI")
+            (const_string "DI"))
 	    (eq_attr "alternative" "11,12")
 	      (cond [(match_test "<MODE>mode == V2SFmode
 				  || <MODE>mode == V4HFmode
@@ -313,7 +316,7 @@ (define_split
 
 (define_split
   [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
-	(match_operand:MMXMODE 1 "const0_operand"))]
+	(match_operand:MMXMODE 1 "vector_const0_or_m1_operand"))]
   "!TARGET_64BIT && reload_completed"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 3afaf83a7a0..95848ce3b46 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1359,6 +1359,18 @@ (define_predicate "nonimm_or_0_operand"
   (ior (match_operand 0 "nonimmediate_operand")
        (match_operand 0 "const0_operand")))
 
+;; Match vector 0 or integer vector -1.
+(define_predicate "vector_const0_or_m1_operand"
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (GET_MODE (op))
+                    || (INTEGRAL_MODE_P (GET_MODE (op))
+                        && op == CONSTM1_RTX (GET_MODE (op)))")))
+
+; Return true when OP is a nonimmediate, vector 0 or integer vector -1.
+(define_predicate "nonimm_or_vector_const0_or_m1_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_operand 0 "vector_const0_or_m1_operand")))
+
 ; Return true when OP is a nonimmediate or zero or all ones.
 (define_predicate "nonimm_or_0_or_1s_operand"
   (ior (match_operand 0 "nonimmediate_operand")
diff --git a/gcc/testsuite/gcc.target/i386/pr106022-2.c b/gcc/testsuite/gcc.target/i386/pr106022-2.c
index 0e79fb53297..2ec3c5fcda5 100644
--- a/gcc/testsuite/gcc.target/i386/pr106022-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr106022-2.c
@@ -9,6 +9,6 @@ foo (int *c)
   c[1] = -1;
 }
 
-/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$-1," 2 { target ia32 } } } */
 /* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1," 1 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "xmm" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121015-1.c b/gcc/testsuite/gcc.target/i386/pr121015-1.c
new file mode 100644
index 00000000000..fefa5185be4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121015-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3" } */
+/* { dg-final { scan-assembler-not "\tmovl\[\\t \]+\\\$-1, %" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "\tmovq\[\\t \]+\\\$-1, " { target { ! ia32 } } } } */
+
+extern union {
+  int i;
+  float f;
+} int_as_float_u;
+
+extern int render_result_from_bake_w;
+extern int render_result_from_bake_h_seed_pass;
+extern float *render_result_from_bake_h_primitive;
+extern float *render_result_from_bake_h_seed;
+
+float
+int_as_float(int i)
+{
+  int_as_float_u.i = i;
+  return int_as_float_u.f;
+}
+
+void
+render_result_from_bake_h(int tx)
+{
+  while (render_result_from_bake_w) {
+    for (; tx < render_result_from_bake_w; tx++)
+      render_result_from_bake_h_primitive[1] =
+          render_result_from_bake_h_primitive[2] = int_as_float(-1);
+    if (render_result_from_bake_h_seed_pass) {
+      *render_result_from_bake_h_seed = 0;
+    }
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr121015-2.c b/gcc/testsuite/gcc.target/i386/pr121015-2.c
new file mode 100644
index 00000000000..34df3599a5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121015-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+void
+foo (int *c)
+{
+  c = __builtin_assume_aligned (c, 16);
+  c[0] = 0;
+  c[1] = 0;
+}
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0," 2 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$0," 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "xmm" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121015-3.c b/gcc/testsuite/gcc.target/i386/pr121015-3.c
new file mode 100644
index 00000000000..44bf63c73e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121015-3.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+typedef enum { CPP_NUMBER } cpp_ttype;
+typedef struct {
+  bool unsignedp;
+  bool overflow;
+} cpp_num;
+extern cpp_num value, __trans_tmp_1;
+extern cpp_ttype eval_token_token_0;
+extern int eval_token_temp;
+static cpp_num
+eval_token(void)
+{
+  cpp_num __trans_tmp_2, result;
+  result.overflow = false;
+  switch (eval_token_token_0)
+    {
+    case CPP_NUMBER:
+      switch (eval_token_temp)
+	{
+	case 1:
+	  return __trans_tmp_1;
+	}
+      result.unsignedp = false;
+      __trans_tmp_2 = result;
+      return __trans_tmp_2;
+    }
+  return result;
+}
+void 
+_cpp_parse_expr_pfile(void)
+{
+  value = eval_token();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr121015-4.c b/gcc/testsuite/gcc.target/i386/pr121015-4.c
new file mode 100644
index 00000000000..2848a946dd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121015-4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.}  } } */
+
+/*
+**zero:
+**.LFB0:
+**	.cfi_startproc
+**	xorps	%xmm0, %xmm0
+**	ret
+**...
+*/
+
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+extern __v2sf f1;
+
+__v2sf
+zero (void)
+{
+  return __extension__(__v2sf){0, 0};
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr121015-5.c b/gcc/testsuite/gcc.target/i386/pr121015-5.c
new file mode 100644
index 00000000000..f736afc2eab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121015-5.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.}  } } */
+
+/*
+**m1:
+**.LFB[0-9]+:
+**	.cfi_startproc
+**	pcmpeqd	%xmm0, %xmm0
+**	ret
+**...
+*/
+
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+__v8qi
+m1 (void)
+{
+  return __extension__(__v8qi){-1, -1, -1, -1, -1, -1, -1, -1};
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr121015-6.c b/gcc/testsuite/gcc.target/i386/pr121015-6.c
new file mode 100644
index 00000000000..daebcb0acc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121015-6.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.}  } } */
+
+/*
+**m1:
+**.LFB[0-9]+:
+**	.cfi_startproc
+**	pcmpeqd	%xmm0, %xmm0
+**	ret
+**...
+*/
+
+#include <x86intrin.h>
+
+__m128i
+m1 (void)
+{
+  __m64 x = _mm_set1_pi8 (-1);
+  __m128i y = _mm_set1_epi64 (x);
+  return y;
+}
-- 
2.50.0

Reply via email to