commit 77473a27bae04da99d6979d43e7bd0a8106f4557 Author: H.J. Lu <hjl.to...@gmail.com> Date: Thu Jun 26 06:08:51 2025 +0800
x86: Also handle all 1s float vector constant replaces (insn 29 28 30 5 (set (reg:V2SF 107) (mem/u/c:V2SF (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) 2031 {*movv2sf_internal} (expr_list:REG_EQUAL (const_vector:V2SF [ (const_double:SF -QNaN [-QNaN]) repeated x2 ]) (nil))) with (insn 98 13 14 3 (set (reg:V8QI 112) (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ])) -1 (nil)) ... (insn 29 28 30 5 (set (reg:V2SF 107) (subreg:V2SF (reg:V8QI 112) 0)) 2031 {*movv2sf_internal} (expr_list:REG_EQUAL (const_vector:V2SF [ (const_double:SF -QNaN [-QNaN]) repeated x2 ]) (nil))) which leads to pr121015.c: In function ‘render_result_from_bake_h’: pr121015.c:34:1: error: unrecognizable insn: 34 | } | ^ (insn 98 13 14 3 (set (reg:V8QI 112) (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ])) -1 (expr_list:REG_EQUIV (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ]) (nil))) during RTL pass: ira 1. Update constm1_operand to also return true for integer and float all 1s vectors. 2. Add nonimm_or_0_or_m1_operand for nonimmediate, zero or -1 operand. 3. Add BI for constant all 0s/1s operand. 4. Update "*mov<mode>_internal" in mmx.md to handle integer all 1s vectors. 5. Update MMXMODE move splitter to also split all 1s source operand. gcc/ PR target/121015 * config/i386/constraints.md (BI): New constraint. * config/i386/i386.cc (ix86_print_operand): Support CONSTM1_RTX. * config/i386/mmx.md (*mov<mode>_internal): Replace C with BI memory and integer register destination. Update MMXMODE move splitter to also split all 1s source operand. * config/i386/predicates.md (constm1_operand): Also return true for int_float_vector_all_ones_operand. (nonimm_or_0_or_m1_operand): New predicate. gcc/testsuite/ PR target/121015 * gcc.target/i386/pr106022-2.c: Adjusted. * gcc.target/i386/pr121015.c: New test. OK for master? -- H.J.
From a9846fdd5e8e43c60fa8ea8e78a2cb72da7a12b9 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Thu, 10 Jul 2025 06:21:58 +0800 Subject: [PATCH] x86: Update "*mov<mode>_internal" in mmx.md to handle all 1s vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 77473a27bae04da99d6979d43e7bd0a8106f4557 Author: H.J. Lu <hjl.to...@gmail.com> Date: Thu Jun 26 06:08:51 2025 +0800 x86: Also handle all 1s float vector constant replaces (insn 29 28 30 5 (set (reg:V2SF 107) (mem/u/c:V2SF (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) 2031 {*movv2sf_internal} (expr_list:REG_EQUAL (const_vector:V2SF [ (const_double:SF -QNaN [-QNaN]) repeated x2 ]) (nil))) with (insn 98 13 14 3 (set (reg:V8QI 112) (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ])) -1 (nil)) ... (insn 29 28 30 5 (set (reg:V2SF 107) (subreg:V2SF (reg:V8QI 112) 0)) 2031 {*movv2sf_internal} (expr_list:REG_EQUAL (const_vector:V2SF [ (const_double:SF -QNaN [-QNaN]) repeated x2 ]) (nil))) which leads to pr121015.c: In function ‘render_result_from_bake_h’: pr121015.c:34:1: error: unrecognizable insn: 34 | } | ^ (insn 98 13 14 3 (set (reg:V8QI 112) (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ])) -1 (expr_list:REG_EQUIV (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ]) (nil))) during RTL pass: ira 1. Update constm1_operand to also return true for integer and float all 1s vectors. 2. Add nonimm_or_0_or_m1_operand for nonimmediate, zero or -1 operand. 3. Add BI for constant all 0s/1s operand. 4. Update "*mov<mode>_internal" in mmx.md to handle integer all 1s vectors. 5. Update MMXMODE move splitter to also split all 1s source operand. gcc/ PR target/121015 * config/i386/constraints.md (BI): New constraint. * config/i386/i386.cc (ix86_print_operand): Support CONSTM1_RTX. * config/i386/mmx.md (*mov<mode>_internal): Replace C with BI memory and integer register destination. Update MMXMODE move splitter to also split all 1s source operand. * config/i386/predicates.md (constm1_operand): Also return true for int_float_vector_all_ones_operand. (nonimm_or_0_or_m1_operand): New predicate. gcc/testsuite/ PR target/121015 * gcc.target/i386/pr106022-2.c: Adjusted. * gcc.target/i386/pr121015.c: New test. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/constraints.md | 5 ++++ gcc/config/i386/i386.cc | 11 +++++-- gcc/config/i386/mmx.md | 13 +++++---- gcc/config/i386/predicates.md | 26 ++++++++++------- gcc/testsuite/gcc.target/i386/pr106022-2.c | 4 +-- gcc/testsuite/gcc.target/i386/pr121015.c | 34 ++++++++++++++++++++++ 6 files changed, 73 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr121015.c diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 38877a7e61b..b436893bce4 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -173,6 +173,7 @@ (define_register_constraint "YW" ;; H Integer SSE constant that is 128/256bit all ones ;; and zero-extand to 256/512bit, or 128bit all ones ;; and zero-extend to 512bit. +;; I Integer vector constant with all 0s/1s operand. ;; M x86-64 memory operand. (define_constraint "Bf" @@ -237,6 +238,10 @@ (define_constraint "BH" (ior (match_operand 0 "vector_all_ones_zero_extend_half_operand") (match_operand 0 "vector_all_ones_zero_extend_quarter_operand"))) +(define_constraint "BI" + "@internal constant all 0s/1s operand." + (match_operand 0 "const0_or_m1_operand")) + ;; NB: Similar to 'm', but don't use define_memory_constraint on x86-64 ;; to prevent LRA from converting the operand to the form '(mem (reg X))' ;; where X is a base register. diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index ad7360ec71a..8d5e9dab8e8 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -14671,9 +14671,14 @@ ix86_print_operand (FILE *file, rtx x, int code) since we can in fact encode that into an immediate. */ if (GET_CODE (x) == CONST_VECTOR) { - if (x != CONST0_RTX (GET_MODE (x))) - output_operand_lossage ("invalid vector immediate"); - x = const0_rtx; + if (x == CONSTM1_RTX (GET_MODE (x))) + x = constm1_rtx; + else + { + if (x != CONST0_RTX (GET_MODE (x))) + output_operand_lossage ("invalid vector immediate"); + x = const0_rtx; + } } if (code == 'P') diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 79202323e53..017784c436e 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -183,9 +183,9 @@ (define_expand "mov<mode>" (define_insn "*mov<mode>_internal" [(set (match_operand:MMXMODE 0 "nonimmediate_operand" - "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x") - (match_operand:MMXMODE 1 "nonimm_or_0_operand" - "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))] + "=r ,o ,r ,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x") + (match_operand:MMXMODE 1 "nonimm_or_0_or_m1_operand" + "rBIo,rBI,BI,rm,rBI,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" @@ -268,7 +268,10 @@ (define_insn "*mov<mode>_internal" (const_string "*"))) (set (attr "mode") (cond [(eq_attr "alternative" "2") - (const_string "SI") + (if_then_else + (match_test "const0_operand (operands[1], <MODE>mode)") + (const_string "SI") + (const_string "DI")) (eq_attr "alternative" "11,12") (cond [(match_test "<MODE>mode == V2SFmode || <MODE>mode == V4HFmode @@ -313,7 +316,7 @@ (define_split (define_split [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand") - (match_operand:MMXMODE 1 "const0_operand"))] + (match_operand:MMXMODE 1 "const0_or_m1_operand"))] "!TARGET_64BIT && reload_completed" [(const_int 0)] "ix86_split_long_move (operands); DONE;") diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 3afaf83a7a0..7def9b89b48 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -833,16 +833,6 @@ (define_predicate "const1_operand" return op == CONST1_RTX (mode); }) -;; Match exactly -1. -(define_predicate "constm1_operand" - (and (match_code "const_int") - (match_test "op == constm1_rtx"))) - -;; Match 0 or -1. -(define_predicate "const0_or_m1_operand" - (ior (match_operand 0 "const0_operand") - (match_operand 0 "constm1_operand"))) - ;; Match exactly eight. (define_predicate "const8_operand" (and (match_code "const_int") @@ -1218,6 +1208,17 @@ (define_predicate "int_float_vector_all_ones_operand" (match_operand 0 "float_vector_all_ones_operand") (match_test "op == constm1_rtx"))) +;; Match exactly -1. +(define_predicate "constm1_operand" + (ior (and (match_code "const_int") + (match_test "op == constm1_rtx")) + (match_operand 0 "int_float_vector_all_ones_operand"))) + +;; Match 0 or -1. +(define_predicate "const0_or_m1_operand" + (ior (match_operand 0 "const0_operand") + (match_operand 0 "constm1_operand"))) + /* Return true if operand is an 128/256bit all ones vector that zero-extends to 256/512bit. */ (define_predicate "vector_all_ones_zero_extend_half_operand" @@ -1359,6 +1360,11 @@ (define_predicate "nonimm_or_0_operand" (ior (match_operand 0 "nonimmediate_operand") (match_operand 0 "const0_operand"))) +; Return true when OP is a nonimmediate, zero or -1. +(define_predicate "nonimm_or_0_or_m1_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "const0_or_m1_operand"))) + ; Return true when OP is a nonimmediate or zero or all ones. (define_predicate "nonimm_or_0_or_1s_operand" (ior (match_operand 0 "nonimmediate_operand") diff --git a/gcc/testsuite/gcc.target/i386/pr106022-2.c b/gcc/testsuite/gcc.target/i386/pr106022-2.c index 0e79fb53297..2ec3c5fcda5 100644 --- a/gcc/testsuite/gcc.target/i386/pr106022-2.c +++ b/gcc/testsuite/gcc.target/i386/pr106022-2.c @@ -9,6 +9,6 @@ foo (int *c) c[1] = -1; } -/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$-1," 2 { target ia32 } } } */ /* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1," 1 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "xmm" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr121015.c b/gcc/testsuite/gcc.target/i386/pr121015.c new file mode 100644 index 00000000000..5ae316e2a68 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121015.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64-v3" } */ +/* { dg-final { scan-assembler-not "\tmovl\[\\t \]+\\\$-1, %" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "\tmovq\[\\t \]+\\\$-1, " 2 { target { ! ia32 } } } } */ + +extern union { + int i; + float f; +} int_as_float_u; + +extern int render_result_from_bake_w; +extern int render_result_from_bake_h_seed_pass; +extern float *render_result_from_bake_h_primitive; +extern float *render_result_from_bake_h_seed; + +float +int_as_float(int i) +{ + int_as_float_u.i = i; + return int_as_float_u.f; +} + +void +render_result_from_bake_h(int tx) +{ + while (render_result_from_bake_w) { + for (; tx < render_result_from_bake_w; tx++) + render_result_from_bake_h_primitive[1] = + render_result_from_bake_h_primitive[2] = int_as_float(-1); + if (render_result_from_bake_h_seed_pass) { + *render_result_from_bake_h_seed = 0; + } + } +} -- 2.50.0