commit 77473a27bae04da99d6979d43e7bd0a8106f4557 Author: H.J. Lu <hjl.to...@gmail.com> Date: Thu Jun 26 06:08:51 2025 +0800
x86: Also handle all 1s float vector constant replaces (insn 29 28 30 5 (set (reg:V2SF 107) (mem/u/c:V2SF (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) 2031 {*movv2sf_internal} (expr_list:REG_EQUAL (const_vector:V2SF [ (const_double:SF -QNaN [-QNaN]) repeated x2 ]) (nil))) with (insn 98 13 14 3 (set (reg:V8QI 112) (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ])) -1 (nil)) ... (insn 29 28 30 5 (set (reg:V2SF 107) (subreg:V2SF (reg:V8QI 112) 0)) 2031 {*movv2sf_internal} (expr_list:REG_EQUAL (const_vector:V2SF [ (const_double:SF -QNaN [-QNaN]) repeated x2 ]) (nil))) which leads to pr121015.c: In function ‘render_result_from_bake_h’: pr121015.c:34:1: error: unrecognizable insn: 34 | } | ^ (insn 98 13 14 3 (set (reg:V8QI 112) (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ])) -1 (expr_list:REG_EQUIV (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ]) (nil))) during RTL pass: ira 1. Add vector_const0_or_m1_operand for vector 0 or integer vector -1. 2. Add nonimm_or_vector_const0_or_m1_operand for nonimmediate, vector 0 or integer vector -1 operand. 3. Add BX constraint for MMX vector constant all 0s/1s operand. 4. Update MMXMODE:*mov<mode>_internal to support integer all 1s vectors. Replace <v,C> with <v,BX> to generate pcmpeqd %xmm0, %xmm0 for (set (reg/i:V8QI 20 xmm0) (const_vector:V8QI [(const_int -1 [0xffffffffffffffff]) repeated x8])) NB: The upper 64 bits in XMM0 are all 1s, instead of all 0s. 5. Update 32-bit MMXMODE move splitter to also split all 1s vector source operand. gcc/ PR target/121015 * config/i386/constraints.md (BX): New constraint. * config/i386/i386.cc (ix86_print_operand): Support CONSTM1_RTX. * config/i386/mmx.md (MMXMODE:*mov<mode>_internal): Replace C with BX for memory and integer register destination. Replace <v,C> with <v,BX>. Update 32-bit MMXMODE move splitter to also split all 1s vector source operand. * config/i386/predicates.md (vector_const0_or_m1_operand): New predicate. (nonimm_or_vector_const0_or_m1_operand): Likewise. gcc/testsuite/ PR target/121015 * gcc.target/i386/pr106022-2.c: Adjusted. * gcc.target/i386/pr121015-1.c: New test. * gcc.target/i386/pr121015-2.c: Likewise. * gcc.target/i386/pr121015-3.c: Likewise. * gcc.target/i386/pr121015-4.c: Likewise. * gcc.target/i386/pr121015-5.c: Likewise. * gcc.target/i386/pr121015-6.c: Likewise. OK for master? Thanks. -- H.J.
From a6e90a205d69f0a192fb885a96bbbe0f8e7c7819 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Thu, 10 Jul 2025 06:21:58 +0800 Subject: [PATCH v2] x86: Update MMXMODE:*mov<mode>_internal to support all 1s vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 77473a27bae04da99d6979d43e7bd0a8106f4557 Author: H.J. Lu <hjl.to...@gmail.com> Date: Thu Jun 26 06:08:51 2025 +0800 x86: Also handle all 1s float vector constant replaces (insn 29 28 30 5 (set (reg:V2SF 107) (mem/u/c:V2SF (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) 2031 {*movv2sf_internal} (expr_list:REG_EQUAL (const_vector:V2SF [ (const_double:SF -QNaN [-QNaN]) repeated x2 ]) (nil))) with (insn 98 13 14 3 (set (reg:V8QI 112) (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ])) -1 (nil)) ... (insn 29 28 30 5 (set (reg:V2SF 107) (subreg:V2SF (reg:V8QI 112) 0)) 2031 {*movv2sf_internal} (expr_list:REG_EQUAL (const_vector:V2SF [ (const_double:SF -QNaN [-QNaN]) repeated x2 ]) (nil))) which leads to pr121015.c: In function ‘render_result_from_bake_h’: pr121015.c:34:1: error: unrecognizable insn: 34 | } | ^ (insn 98 13 14 3 (set (reg:V8QI 112) (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ])) -1 (expr_list:REG_EQUIV (const_vector:V8QI [ (const_int -1 [0xffffffffffffffff]) repeated x8 ]) (nil))) during RTL pass: ira 1. Add vector_const0_or_m1_operand for vector 0 or integer vector -1. 2. Add nonimm_or_vector_const0_or_m1_operand for nonimmediate, vector 0 or integer vector -1 operand. 3. Add BX constraint for MMX vector constant all 0s/1s operand. 4. Update MMXMODE:*mov<mode>_internal to support integer all 1s vectors. Replace <v,C> with <v,BX> to generate pcmpeqd %xmm0, %xmm0 for (set (reg/i:V8QI 20 xmm0) (const_vector:V8QI [(const_int -1 [0xffffffffffffffff]) repeated x8])) NB: The upper 64 bits in XMM0 are all 1s, instead of all 0s. 5. Update 32-bit MMXMODE move splitter to also split all 1s vector source operand. gcc/ PR target/121015 * config/i386/constraints.md (BX): New constraint. * config/i386/i386.cc (ix86_print_operand): Support CONSTM1_RTX. * config/i386/mmx.md (MMXMODE:*mov<mode>_internal): Replace C with BX for memory and integer register destination. Replace <v,C> with <v,BX>. Update 32-bit MMXMODE move splitter to also split all 1s vector source operand. * config/i386/predicates.md (vector_const0_or_m1_operand): New predicate. (nonimm_or_vector_const0_or_m1_operand): Likewise. gcc/testsuite/ PR target/121015 * gcc.target/i386/pr106022-2.c: Adjusted. * gcc.target/i386/pr121015-1.c: New test. * gcc.target/i386/pr121015-2.c: Likewise. * gcc.target/i386/pr121015-3.c: Likewise. * gcc.target/i386/pr121015-4.c: Likewise. * gcc.target/i386/pr121015-5.c: Likewise. * gcc.target/i386/pr121015-6.c: Likewise. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/constraints.md | 6 ++++ gcc/config/i386/i386.cc | 11 +++++-- gcc/config/i386/mmx.md | 13 ++++---- gcc/config/i386/predicates.md | 12 ++++++++ gcc/testsuite/gcc.target/i386/pr106022-2.c | 4 +-- gcc/testsuite/gcc.target/i386/pr121015-1.c | 34 +++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr121015-2.c | 14 +++++++++ gcc/testsuite/gcc.target/i386/pr121015-3.c | 35 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr121015-4.c | 22 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr121015-5.c | 21 +++++++++++++ gcc/testsuite/gcc.target/i386/pr121015-6.c | 23 ++++++++++++++ 11 files changed, 185 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr121015-6.c diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 38877a7e61b..0d563298857 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -174,6 +174,7 @@ (define_register_constraint "YW" ;; and zero-extand to 256/512bit, or 128bit all ones ;; and zero-extend to 512bit. ;; M x86-64 memory operand. +;; X MMX vector constant with all 0s/1s operand. (define_constraint "Bf" "@internal Flags register operand." @@ -246,6 +247,11 @@ (define_constraint "BM" (match_test "memory_address_addr_space_p (GET_MODE (op), XEXP (op, 0), MEM_ADDR_SPACE (op))"))) +(define_constraint "BX" + "@internal MMX vector constant all 0s/1s operand." + (and (match_test "TARGET_MMX || TARGET_MMX_WITH_SSE") + (match_operand 0 "vector_const0_or_m1_operand"))) + ;; Integer constant constraints. (define_constraint "Wb" "Integer constant in the range 0 @dots{} 7, for 8-bit shifts." diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index ad7360ec71a..8d5e9dab8e8 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -14671,9 +14671,14 @@ ix86_print_operand (FILE *file, rtx x, int code) since we can in fact encode that into an immediate. */ if (GET_CODE (x) == CONST_VECTOR) { - if (x != CONST0_RTX (GET_MODE (x))) - output_operand_lossage ("invalid vector immediate"); - x = const0_rtx; + if (x == CONSTM1_RTX (GET_MODE (x))) + x = constm1_rtx; + else + { + if (x != CONST0_RTX (GET_MODE (x))) + output_operand_lossage ("invalid vector immediate"); + x = const0_rtx; + } } if (code == 'P') diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 79202323e53..1dab0d77af3 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -183,9 +183,9 @@ (define_expand "mov<mode>" (define_insn "*mov<mode>_internal" [(set (match_operand:MMXMODE 0 "nonimmediate_operand" - "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x") - (match_operand:MMXMODE 1 "nonimm_or_0_operand" - "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))] + "=r ,o ,r ,r ,m ,?!y,!y,?!y,m ,r ,?!y,v ,v,v,m,r,v,!y,*x") + (match_operand:MMXMODE 1 "nonimm_or_vector_const0_or_m1_operand" + "rBXo,rBX,BX,rm,rBX,C ,!y,m ,?!y,?!y,r ,BX,v,m,v,v,r,*x,!y"))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" @@ -268,7 +268,10 @@ (define_insn "*mov<mode>_internal" (const_string "*"))) (set (attr "mode") (cond [(eq_attr "alternative" "2") - (const_string "SI") + (if_then_else + (match_test "operands[1] == CONST0_RTX (<MODE>mode)") + (const_string "SI") + (const_string "DI")) (eq_attr "alternative" "11,12") (cond [(match_test "<MODE>mode == V2SFmode || <MODE>mode == V4HFmode @@ -313,7 +316,7 @@ (define_split (define_split [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand") - (match_operand:MMXMODE 1 "const0_operand"))] + (match_operand:MMXMODE 1 "vector_const0_or_m1_operand"))] "!TARGET_64BIT && reload_completed" [(const_int 0)] "ix86_split_long_move (operands); DONE;") diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 3afaf83a7a0..95848ce3b46 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1359,6 +1359,18 @@ (define_predicate "nonimm_or_0_operand" (ior (match_operand 0 "nonimmediate_operand") (match_operand 0 "const0_operand"))) +;; Match vector 0 or integer vector -1. +(define_predicate "vector_const0_or_m1_operand" + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op)) + || (INTEGRAL_MODE_P (GET_MODE (op)) + && op == CONSTM1_RTX (GET_MODE (op)))"))) + +; Return true when OP is a nonimmediate, vector 0 or integer vector -1. +(define_predicate "nonimm_or_vector_const0_or_m1_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "vector_const0_or_m1_operand"))) + ; Return true when OP is a nonimmediate or zero or all ones. (define_predicate "nonimm_or_0_or_1s_operand" (ior (match_operand 0 "nonimmediate_operand") diff --git a/gcc/testsuite/gcc.target/i386/pr106022-2.c b/gcc/testsuite/gcc.target/i386/pr106022-2.c index 0e79fb53297..2ec3c5fcda5 100644 --- a/gcc/testsuite/gcc.target/i386/pr106022-2.c +++ b/gcc/testsuite/gcc.target/i386/pr106022-2.c @@ -9,6 +9,6 @@ foo (int *c) c[1] = -1; } -/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$-1," 2 { target ia32 } } } */ /* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1," 1 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "xmm" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr121015-1.c b/gcc/testsuite/gcc.target/i386/pr121015-1.c new file mode 100644 index 00000000000..fefa5185be4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121015-1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64-v3" } */ +/* { dg-final { scan-assembler-not "\tmovl\[\\t \]+\\\$-1, %" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "\tmovq\[\\t \]+\\\$-1, " { target { ! ia32 } } } } */ + +extern union { + int i; + float f; +} int_as_float_u; + +extern int render_result_from_bake_w; +extern int render_result_from_bake_h_seed_pass; +extern float *render_result_from_bake_h_primitive; +extern float *render_result_from_bake_h_seed; + +float +int_as_float(int i) +{ + int_as_float_u.i = i; + return int_as_float_u.f; +} + +void +render_result_from_bake_h(int tx) +{ + while (render_result_from_bake_w) { + for (; tx < render_result_from_bake_w; tx++) + render_result_from_bake_h_primitive[1] = + render_result_from_bake_h_primitive[2] = int_as_float(-1); + if (render_result_from_bake_h_seed_pass) { + *render_result_from_bake_h_seed = 0; + } + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr121015-2.c b/gcc/testsuite/gcc.target/i386/pr121015-2.c new file mode 100644 index 00000000000..34df3599a5b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121015-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ + +void +foo (int *c) +{ + c = __builtin_assume_aligned (c, 16); + c[0] = 0; + c[1] = 0; +} + +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0," 2 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$0," 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "xmm" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr121015-3.c b/gcc/testsuite/gcc.target/i386/pr121015-3.c new file mode 100644 index 00000000000..44bf63c73e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121015-3.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ + +typedef enum { CPP_NUMBER } cpp_ttype; +typedef struct { + bool unsignedp; + bool overflow; +} cpp_num; +extern cpp_num value, __trans_tmp_1; +extern cpp_ttype eval_token_token_0; +extern int eval_token_temp; +static cpp_num +eval_token(void) +{ + cpp_num __trans_tmp_2, result; + result.overflow = false; + switch (eval_token_token_0) + { + case CPP_NUMBER: + switch (eval_token_temp) + { + case 1: + return __trans_tmp_1; + } + result.unsignedp = false; + __trans_tmp_2 = result; + return __trans_tmp_2; + } + return result; +} +void +_cpp_parse_expr_pfile(void) +{ + value = eval_token(); +} diff --git a/gcc/testsuite/gcc.target/i386/pr121015-4.c b/gcc/testsuite/gcc.target/i386/pr121015-4.c new file mode 100644 index 00000000000..2848a946dd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121015-4.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ + +/* +**zero: +**.LFB0: +** .cfi_startproc +** xorps %xmm0, %xmm0 +** ret +**... +*/ + +typedef float __v2sf __attribute__ ((__vector_size__ (8))); +extern __v2sf f1; + +__v2sf +zero (void) +{ + return __extension__(__v2sf){0, 0}; +} diff --git a/gcc/testsuite/gcc.target/i386/pr121015-5.c b/gcc/testsuite/gcc.target/i386/pr121015-5.c new file mode 100644 index 00000000000..f736afc2eab --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121015-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ + +/* +**m1: +**.LFB[0-9]+: +** .cfi_startproc +** pcmpeqd %xmm0, %xmm0 +** ret +**... +*/ + +typedef char __v8qi __attribute__ ((__vector_size__ (8))); + +__v8qi +m1 (void) +{ + return __extension__(__v8qi){-1, -1, -1, -1, -1, -1, -1, -1}; +} diff --git a/gcc/testsuite/gcc.target/i386/pr121015-6.c b/gcc/testsuite/gcc.target/i386/pr121015-6.c new file mode 100644 index 00000000000..daebcb0acc5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121015-6.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ + +/* +**m1: +**.LFB[0-9]+: +** .cfi_startproc +** pcmpeqd %xmm0, %xmm0 +** ret +**... +*/ + +#include <x86intrin.h> + +__m128i +m1 (void) +{ + __m64 x = _mm_set1_pi8 (-1); + __m128i y = _mm_set1_epi64 (x); + return y; +} -- 2.50.0