https://gcc.gnu.org/g:fefc96d7b285fd541696d1a43223612d282bc197
commit r16-5669-gfefc96d7b285fd541696d1a43223612d282bc197 Author: Robin Dapp <[email protected]> Date: Wed Nov 26 10:27:24 2025 +0100 forwprop: Nop-convert operands if necessary [PR122855]. This fixes up r16-5561-g283eb27d5f674b where I allowed nop conversions for the input operands. There are several paths through the function that still require an explicit nop conversion for them. This patch adds them. PR tree-optimization/122855 PR tree-optimization/122850 gcc/ChangeLog: * tree-ssa-forwprop.cc (simplify_vector_constructor): Nop convert input if necessary. gcc/testsuite/ChangeLog: * gcc.dg/vect/pr122850.c: New test. * gcc.dg/vect/pr122855.c: New test. Diff: --- gcc/testsuite/gcc.dg/vect/pr122850.c | 13 ++++++++++ gcc/testsuite/gcc.dg/vect/pr122855.c | 15 ++++++++++++ gcc/tree-ssa-forwprop.cc | 47 ++++++++++++++++++++++++++++++------ 3 files changed, 67 insertions(+), 8 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr122850.c b/gcc/testsuite/gcc.dg/vect/pr122850.c new file mode 100644 index 000000000000..4f50aa9660dc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr122850.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { x86_64-*-* i?86-*-* } } } */ +/* { dg-additional-options "-O3 -march=haswell -m32" } */ + +typedef int v2ll __attribute__ ((__vector_size__ (2 * sizeof (int)))); +typedef unsigned int v2ull __attribute__ ((__vector_size__ (2 * sizeof (int)))); +typedef __attribute__ ((__vector_size__ (2 * sizeof (short)))) short v2s; + +v2ll +f (v2ull e) +{ + v2s c = (v2s) e[0]; + return (v2ll) {(int) c, 0}; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr122855.c b/gcc/testsuite/gcc.dg/vect/pr122855.c new file mode 100644 index 000000000000..3084d2062a17 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr122855.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { x86_64-*-* i?86-*-* } } } */ +/* { dg-additional-options "-O3 -march=haswell" } */ + +int zoom_x3_weights_0, zoom_x3_j, zoom_x3_pixel2; + +void zoom_x3(char *__restrict s, char *__restrict zoom_x3_tmp) { + int pixel0 = 0, pixel1 = 0; + for (; zoom_x3_j; zoom_x3_j--) { + pixel0 += *s++ * zoom_x3_weights_0; + pixel1 += *s++ * zoom_x3_weights_0; + zoom_x3_pixel2 += *s++ * zoom_x3_weights_0; + } + *zoom_x3_tmp++ = pixel0 < 0 ? 0 : pixel0 > 255 ? 255 : pixel0; + *zoom_x3_tmp = pixel1 < 0 ? 0 : pixel1 > 255 ? 255 : pixel1; +} diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 00140ce950c3..2200fc04918d 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -4183,24 +4183,45 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) /* ??? We can see if we can safely convert to the original element type. */ converted_orig1 = conv_code != ERROR_MARK; + tree target_type = converted_orig1 ? type : perm_type; + tree nonconstant_for_splat = one_nonconstant; + /* If there's a nop conversion between the target element type and + the nonconstant's type, convert it. */ + if (!useless_type_conversion_p (TREE_TYPE (target_type), + TREE_TYPE (one_nonconstant))) + nonconstant_for_splat + = gimple_build (&stmts, NOP_EXPR, TREE_TYPE (target_type), + one_nonconstant); orig[1] = gimple_build_vector_from_val (&stmts, UNKNOWN_LOCATION, - converted_orig1 - ? type : perm_type, - one_nonconstant); + target_type, + nonconstant_for_splat); } else if (orig[1] == error_mark_node) { /* ??? See if we can convert the vector to the original type. */ converted_orig1 = conv_code != ERROR_MARK; unsigned n = converted_orig1 ? nelts : refnelts; - tree_vector_builder vec (converted_orig1 - ? type : perm_type, n, 1); + tree target_type = converted_orig1 ? type : perm_type; + tree_vector_builder vec (target_type, n, 1); for (unsigned i = 0; i < n; ++i) if (i < nelts && constants[i]) - vec.quick_push (constants[i]); + { + tree constant = constants[i]; + /* If there's a nop conversion, convert the constant. */ + if (!useless_type_conversion_p (TREE_TYPE (target_type), + TREE_TYPE (constant))) + constant = fold_convert (TREE_TYPE (target_type), constant); + vec.quick_push (constant); + } else - /* ??? Push a don't-care value. */ - vec.quick_push (one_constant); + { + /* ??? Push a don't-care value. */ + tree constant = one_constant; + if (!useless_type_conversion_p (TREE_TYPE (target_type), + TREE_TYPE (constant))) + constant = fold_convert (TREE_TYPE (target_type), constant); + vec.quick_push (constant); + } orig[1] = vec.build (); } tree blend_op2 = NULL_TREE; @@ -4224,6 +4245,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) return false; blend_op2 = vec_perm_indices_to_tree (mask_type, indices); } + + /* For a real orig[1] (no splat, constant etc.) we might need to + nop-convert it. Do so here. */ + if (orig[1] && orig[1] != error_mark_node + && !useless_type_conversion_p (perm_type, TREE_TYPE (orig[1])) + && tree_nop_conversion_p (TREE_TYPE (perm_type), + TREE_TYPE (TREE_TYPE (orig[1])))) + orig[1] = gimple_build (&stmts, VIEW_CONVERT_EXPR, perm_type, + orig[1]); + tree orig1_for_perm = converted_orig1 ? build_zero_cst (perm_type) : orig[1]; tree res = gimple_build (&stmts, VEC_PERM_EXPR, perm_type,
