commit: 6baf8ceed58f657d50e941e6943aca1f9cff5c69 Author: Sam James <sam <AT> gentoo <DOT> org> AuthorDate: Fri Dec 19 23:04:07 2025 +0000 Commit: Sam James <sam <AT> gentoo <DOT> org> CommitDate: Fri Dec 19 23:04:07 2025 +0000 URL: https://gitweb.gentoo.org/proj/gcc-patches.git/commit/?id=6baf8cee
16.0.0: fix vectoriser wrong-code Bug: https://gcc.gnu.org/PR123089 Signed-off-by: Sam James <sam <AT> gentoo.org> 16.0.0/gentoo/88_all_PR123089.patch | 648 ++++++++++++++++++++++++++++++++++++ 16.0.0/gentoo/README.history | 1 + 2 files changed, 649 insertions(+) diff --git a/16.0.0/gentoo/88_all_PR123089.patch b/16.0.0/gentoo/88_all_PR123089.patch new file mode 100644 index 0000000..7e55ad1 --- /dev/null +++ b/16.0.0/gentoo/88_all_PR123089.patch @@ -0,0 +1,648 @@ +From 4d8a8d4e44c55f16818f0b44c1372f28dbb8a46c Mon Sep 17 00:00:00 2001 +Message-ID: <4d8a8d4e44c55f16818f0b44c1372f28dbb8a46c.1766185423.git....@gentoo.org> +From: Tamar Christina <[email protected]> +Date: Fri, 19 Dec 2025 16:15:47 +0000 +Subject: [PATCH] use wider precision type for generating early break scalar IV + [PR123089] + +In the PR we see that the new scalar IV tricks other passes to think there's an +overflow to the use of a signed counter: + +The loop is known to iterate 8191 times and we have a VF of 8 and it starts +at 2. + +The codegen out of the vectorizer is the same as before, except we now have a +scalar variable counting the scalar iteration count vs a vector one. + +i.e. we have + +_45 = _39 + 8; + +vs + +_46 = _45 + { 16, 16, 16, 16, ... } + +we pick a lower VF now since costing allows it to but that's not important. + +When we get to cunroll since the value is now scalar, it sees that 8 * 8191 +would overflow a signed short and so it changes the loop bounds to the largest +possible signed value and then uses this to elide the ivtmp_50 < 8191 as always +true and so you get an infinite loop: + +Analyzing # of iterations of loop 1 + exit condition [1, + , 1](no_overflow) < 8191 + bounds on difference of bases: 8190 ... 8190 + result: + # of iterations 8190, bounded by 8190 +Statement (exit)if (ivtmp_50 < 8191) + is executed at most 8190 (bounded by 8190) + 1 times in loop 1. +Induction variable (signed short) 8 + 8 * iteration does not wrap in statement +_45 = _39 + 8; + in loop 1. +Statement _45 = _39 + 8; + is executed at most 4094 (bounded by 4094) + 1 times in loop 1. + +The signed type was originally chosen because of the negative offset we use when +adjusting for peeling for alignments with masks. However this then introduces +issues as we see here with signed overflow. This patch instead determines the +smallest possible unsigned type for use by the scalar IV where the overflow +won't happen when we include the extra bit for the sign. i.e. if the scalar IV +is an unsigned 8 bit value we pick a signed 16-bit type. But if a signed 8-bit +value we pick a unsigned 8 bit type. + +We use the initial niters value to determine the smallest size possible, to +prevent certain cases like when the IV in code is a 64-bit to need a TImode +counter. I also only require the additional bit when I know we'll be generating +the SMAX. I've now moved this to vectorizable_early_exit such that if we do +end up needing something like TImode that we don't vectorize if the target +doesn't support it. + +I've also added some testcases for masking around the boundary values. I've +only added them for char to reduce the runtime of the tests. + +Bootstrapped Regtested on aarch64-none-linux-gnu, +arm-none-linux-gnueabihf, x86_64-pc-linux-gnu +-m32, -m64 and no issues. + +Any final comments? otherwise will push tomorrow. + +Thanks, +Tamar + +gcc/ChangeLog: + + PR tree-optimization/123089 + * tree-vect-loop.cc (vect_update_ivs_after_vectorizer_for_early_breaks): + Add conversion if required, Note that if we did truncate the original + scalar loop had an overflow here anyway. + (vect_get_max_nscalars_per_iter): Expose. + * tree-vect-stmts.cc (vect_compute_type_for_early_break_scalar_iv): New. + (vectorizable_early_exit): Find smallest type where we won't have UB in + the signed IV and store it. + * tree-vectorizer.h (LOOP_VINFO_EARLY_BRK_IV_TYPE): New. + (class _loop_vec_info): Add early_break_iv_type. + (vect_min_prec_for_max_niters): New. + * tree-vect-loop-manip.cc (vect_do_peeling): Use it. + +gcc/testsuite/ChangeLog: + +PR tree-optimization/123089 + * gcc.dg/vect/vect-early-break_141-pr123089.c: New test. + * gcc.target/aarch64/sve/peel_ind_14.c: New test. + * gcc.target/aarch64/sve/peel_ind_14_run.c: New test. + * gcc.target/aarch64/sve/peel_ind_15.c: New test. + * gcc.target/aarch64/sve/peel_ind_15_run.c: New test. + * gcc.target/aarch64/sve/peel_ind_16.c: New test. + * gcc.target/aarch64/sve/peel_ind_16_run.c: New test. + * gcc.target/aarch64/sve/peel_ind_17.c: New test. + * gcc.target/aarch64/sve/peel_ind_17_run.c: New test. +--- + .../vect/vect-early-break_141-pr123089.c | 40 ++++++++++++ + .../gcc.target/aarch64/sve/peel_ind_14.c | 24 +++++++ + .../gcc.target/aarch64/sve/peel_ind_14_run.c | 42 ++++++++++++ + .../gcc.target/aarch64/sve/peel_ind_15.c | 24 +++++++ + .../gcc.target/aarch64/sve/peel_ind_15_run.c | 42 ++++++++++++ + .../gcc.target/aarch64/sve/peel_ind_16.c | 24 +++++++ + .../gcc.target/aarch64/sve/peel_ind_16_run.c | 41 ++++++++++++ + .../gcc.target/aarch64/sve/peel_ind_17.c | 24 +++++++ + .../gcc.target/aarch64/sve/peel_ind_17_run.c | 41 ++++++++++++ + gcc/tree-vect-loop-manip.cc | 6 +- + gcc/tree-vect-loop.cc | 15 +++-- + gcc/tree-vect-stmts.cc | 64 +++++++++++++++++++ + gcc/tree-vectorizer.h | 7 +- + 13 files changed, 384 insertions(+), 10 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17_run.c + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c +new file mode 100644 +index 000000000000..431edbfbde67 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c +@@ -0,0 +1,40 @@ ++/* { dg-add-options vect_early_break } */ ++/* { dg-require-effective-target vect_early_break_hw } */ ++/* { dg-require-effective-target vect_int } */ ++/* { dg-require-effective-target avx2_runtime { target { i?86-*-* x86_64-*-* } } } */ ++ ++/* { dg-additional-options "-O3 -fno-strict-aliasing -march=znver3" { target { i?86-*-* x86_64-*-* } } } */ ++/* { dg-final { scan-tree-dump "loop vectorized" "vect" { target { i?86-*-* x86_64-*-* } } } } */ ++ ++#include "tree-vect.h" ++ ++struct ++{ ++ int d; ++ short e; ++} i; ++ ++int b; ++int *h = &b; ++ ++int ++main () ++{ ++ check_vect (); ++ ++ short f = 1; ++ short *g = &i.e; ++ ++a: ++ if (*g = 0 & ++f, *h) ++ ; ++ else ++ { ++ int c = 0; ++ if (f) ++ goto a; ++ h = &c; ++ } ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14.c +new file mode 100644 +index 000000000000..b2f4650bb2ca +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14.c +@@ -0,0 +1,24 @@ ++/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */ ++/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */ ++/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */ ++ ++/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */ ++ ++#define START 2 ++ ++int __attribute__((noipa)) ++foo (unsigned char n, int *x) ++{ ++ unsigned char i = 0; ++#pragma GCC unroll 0 ++ for (i = START; i < n; ++i) ++ { ++ if (x[i] == 0) ++ return i; ++ x[i] += 1; ++ } ++ return i; ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c +new file mode 100644 +index 000000000000..fab939bb25e4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c +@@ -0,0 +1,42 @@ ++/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */ ++/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */ ++/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */ ++ ++#define START 2 ++ ++int __attribute__((noipa)) ++foo (unsigned char n, int *x) ++{ ++ unsigned char i = 0; ++#pragma GCC unroll 0 ++ for (i = START; i < n; ++i) ++ { ++ if (x[i] == 0) ++ return i; ++ x[i] += 1; ++ } ++ return i; ++} ++ ++int main () ++{ ++ int max = 255 - START; ++ int x[255 - START]; ++#pragma GCC unroll 0 ++ for (int i = 0; i < max; i++) ++ x[i] = 1; ++ ++ x[200] = 0; ++ int res = foo (max, x); ++ if (res != 200) ++ __builtin_abort (); ++ ++ if (x[START] != 2) ++ __builtin_abort (); ++ ++ if (x[0] != 1) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15.c +new file mode 100644 +index 000000000000..b2f4650bb2ca +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15.c +@@ -0,0 +1,24 @@ ++/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */ ++/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */ ++/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */ ++ ++/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */ ++ ++#define START 2 ++ ++int __attribute__((noipa)) ++foo (unsigned char n, int *x) ++{ ++ unsigned char i = 0; ++#pragma GCC unroll 0 ++ for (i = START; i < n; ++i) ++ { ++ if (x[i] == 0) ++ return i; ++ x[i] += 1; ++ } ++ return i; ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c +new file mode 100644 +index 000000000000..13763f5ebfbe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c +@@ -0,0 +1,42 @@ ++/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */ ++/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */ ++/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */ ++ ++#define START 2 ++ ++int __attribute__((noipa)) ++foo (unsigned char n, int *x) ++{ ++ unsigned char i = 0; ++#pragma GCC unroll 0 ++ for (i = START; i < n; ++i) ++ { ++ if (x[i] == 0) ++ return i; ++ x[i] += 1; ++ } ++ return i; ++} ++ ++int main () ++{ ++ int max = 255 - START; ++ int x[255 - START]; ++#pragma GCC unroll 0 ++ for (int i = 0; i < max; i++) ++ x[i] = 1; ++ ++ x[33] = 0; ++ int res = foo (max, x); ++ if (res != 33) ++ __builtin_abort (); ++ ++ if (x[START] != 2) ++ __builtin_abort (); ++ ++ if (x[0] != 1) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16.c +new file mode 100644 +index 000000000000..b2f4650bb2ca +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16.c +@@ -0,0 +1,24 @@ ++/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */ ++/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */ ++/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */ ++ ++/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */ ++ ++#define START 2 ++ ++int __attribute__((noipa)) ++foo (unsigned char n, int *x) ++{ ++ unsigned char i = 0; ++#pragma GCC unroll 0 ++ for (i = START; i < n; ++i) ++ { ++ if (x[i] == 0) ++ return i; ++ x[i] += 1; ++ } ++ return i; ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c +new file mode 100644 +index 000000000000..120f737d2312 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c +@@ -0,0 +1,41 @@ ++/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */ ++/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */ ++/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */ ++ ++#define START 2 ++ ++int __attribute__((noipa)) ++foo (unsigned char n, int *x) ++{ ++ unsigned char i = 0; ++#pragma GCC unroll 0 ++ for (i = START; i < n; ++i) ++ { ++ if (x[i] == 0) ++ return i; ++ x[i] += 1; ++ } ++ return i; ++} ++ ++int main () ++{ ++ int max = 255 - START; ++ int x[255 - START]; ++#pragma GCC unroll 0 ++ for (int i = 0; i < max; i++) ++ x[i] = 1; ++ ++ int res = foo (max, x); ++ if (res != max) ++ __builtin_abort (); ++ ++ if (x[START] != 2) ++ __builtin_abort (); ++ ++ if (x[0] != 1) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17.c +new file mode 100644 +index 000000000000..5395a759c612 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17.c +@@ -0,0 +1,24 @@ ++/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */ ++/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */ ++/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */ ++ ++/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */ ++ ++#define START 2 ++ ++int __attribute__((noipa)) ++foo (int *x) ++{ ++ unsigned long i = 0; ++#pragma GCC unroll 0 ++ for (i = START; i < 253; ++i) ++ { ++ if (x[i] == 0) ++ return i; ++ x[i] += 1; ++ } ++ return i; ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17_run.c +new file mode 100644 +index 000000000000..73163507f66b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_17_run.c +@@ -0,0 +1,41 @@ ++/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */ ++/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */ ++/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */ ++ ++#define START 2 ++ ++int __attribute__((noipa)) ++foo (int *x) ++{ ++ unsigned int i = 0; ++#pragma GCC unroll 0 ++ for (i = START; i < 253; ++i) ++ { ++ if (x[i] == 0) ++ return i; ++ x[i] += 1; ++ } ++ return i; ++} ++ ++int main () ++{ ++ int x[255 - START]; ++#pragma GCC unroll 0 ++ for (int i = 0; i < 253; i++) ++ x[i] = 1; ++ ++ x[200] = 0; ++ int res = foo (x); ++ if (res != 200) ++ __builtin_abort (); ++ ++ if (x[START] != 2) ++ __builtin_abort (); ++ ++ if (x[0] != 1) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc +index 624d289156c2..8b4542365281 100644 +--- a/gcc/tree-vect-loop-manip.cc ++++ b/gcc/tree-vect-loop-manip.cc +@@ -3738,10 +3738,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, + tree vector_iters_vf = niters_vector_mult_vf; + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + { +- tree vector_iters_vf_type = uncounted_p ? sizetype +- : TREE_TYPE (vector_iters_vf); +- tree scal_iv_ty = signed_type_for (vector_iters_vf_type); +- tree tmp_niters_vf = make_ssa_name (scal_iv_ty); ++ tree tmp_niters_vf ++ = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo)); + + if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo) + && get_loop_exit_edges (loop).length () == 1)) +diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc +index b15589a711b1..0c275a9edeb4 100644 +--- a/gcc/tree-vect-loop.cc ++++ b/gcc/tree-vect-loop.cc +@@ -928,7 +928,7 @@ vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo) + as an unsigned integer, where MAX_NITERS is the maximum number of + loop header iterations for the original scalar form of LOOP_VINFO. */ + +-static unsigned ++unsigned + vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor) + { + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); +@@ -11055,10 +11055,15 @@ vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info loop_vinfo) + final IV. */ + if (niters_skip) + { +- induc_def = gimple_build (&iv_stmts, MAX_EXPR, TREE_TYPE (induc_def), +- induc_def, +- build_zero_cst (TREE_TYPE (induc_def))); +- auto stmt = gimple_build_assign (phi_var, induc_def); ++ tree induc_type = TREE_TYPE (induc_def); ++ tree s_induc_type = signed_type_for (induc_type); ++ induc_def = gimple_build (&iv_stmts, MAX_EXPR, s_induc_type, ++ gimple_convert (&iv_stmts, s_induc_type, ++ induc_def), ++ build_zero_cst (s_induc_type)); ++ auto stmt = gimple_build_assign (phi_var, ++ gimple_convert (&iv_stmts, induc_type, ++ induc_def)); + gimple_seq_add_stmt_without_update (&iv_stmts, stmt); + basic_block exit_bb = NULL; + /* Identify the early exit merge block. I wish we had stored this. */ +diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc +index d5a50a39409b..a2f345c97d1c 100644 +--- a/gcc/tree-vect-stmts.cc ++++ b/gcc/tree-vect-stmts.cc +@@ -12784,6 +12784,67 @@ supports_vector_compare_and_branch (loop_vec_info loop_vinfo, machine_mode mode) + return direct_optab_handler (cbranch_optab, mode) != CODE_FOR_nothing; + } + ++/* Determine the type to use for early break vectorization's scalar IV. If ++ no type is possible return false. */ ++ ++static bool ++vect_compute_type_for_early_break_scalar_iv (loop_vec_info loop_vinfo) ++{ ++ /* Check if we have a usable scalar IV type for vectorization. */ ++ tree iters_vf_type = sizetype; ++ if (!LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)) ++ { ++ /* Find the type with the minimum precision we can use ++ for the scalar IV. */ ++ tree cand_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)); ++ ++ /* Work out how many bits we need to represent the limit. */ ++ unsigned int min_ni_width ++ = vect_min_prec_for_max_niters (loop_vinfo, 1); ++ ++ /* Check if we're using PFA, if so we need a signed IV and an ++ extra bit for the sign. */ ++ if (TYPE_UNSIGNED (cand_type) ++ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) ++ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) ++ min_ni_width += 1; ++ ++ if (TYPE_PRECISION (cand_type) >= min_ni_width) ++ iters_vf_type = unsigned_type_for (cand_type); ++ else ++ { ++ opt_scalar_int_mode cmp_mode_iter; ++ tree iv_type = NULL_TREE; ++ FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT) ++ { ++ auto cmp_mode = cmp_mode_iter.require (); ++ unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode); ++ if (cmp_bits >= min_ni_width ++ && targetm.scalar_mode_supported_p (cmp_mode)) ++ { ++ iv_type = build_nonstandard_integer_type (cmp_bits, true); ++ if (iv_type) ++ break; ++ } ++ } ++ ++ if (!iv_type) ++ { ++ if (dump_enabled_p ()) ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "can't vectorize early exit because the " ++ "target doesn't support a scalar type wide " ++ "wide enough to hold niters.\n"); ++ return false; ++ } ++ iters_vf_type = iv_type; ++ } ++ } ++ ++ LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo) = iters_vf_type; ++ return true; ++} ++ + /* Check to see if the current early break given in STMT_INFO is valid for + vectorization. */ + +@@ -12897,6 +12958,9 @@ vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, + vect_record_loop_mask (loop_vinfo, masks, vec_num, vectype, NULL); + } + ++ if (!vect_compute_type_for_early_break_scalar_iv (loop_vinfo)) ++ return false; ++ + return true; + } + +diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h +index 8f7483297ea8..840af10a7a23 100644 +--- a/gcc/tree-vectorizer.h ++++ b/gcc/tree-vectorizer.h +@@ -1249,6 +1249,10 @@ public: + inside the relavent exit blocks in order to adjust for early break. */ + tree early_break_niters_var; + ++ /* The type of the variable to be used to create the scalar IV for early break ++ loops. */ ++ tree early_break_iv_type; ++ + /* Record statements that are needed to be live for early break vectorization + but may not have an LC PHI node materialized yet in the exits. */ + auto_vec<stmt_vec_info> early_break_live_ivs; +@@ -1320,6 +1324,7 @@ public: + #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb + #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses + #define LOOP_VINFO_EARLY_BRK_NITERS_VAR(L) (L)->early_break_niters_var ++#define LOOP_VINFO_EARLY_BRK_IV_TYPE(L) (L)->early_break_iv_type + #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds + #define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond + #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies +@@ -2676,7 +2681,7 @@ extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *, + extern gimple_seq vect_gen_len (tree, tree, tree, tree); + extern vect_reduc_info info_for_reduction (loop_vec_info, slp_tree); + extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *); +- ++extern unsigned vect_min_prec_for_max_niters (loop_vec_info, unsigned int); + /* Drive for loop transformation stage. */ + extern class loop *vect_transform_loop (loop_vec_info, gimple *); + struct vect_loop_form_info + +base-commit: 68501f9d89e56d53d69d899cebb45b81ea5687ac +-- +2.52.0 + diff --git a/16.0.0/gentoo/README.history b/16.0.0/gentoo/README.history index a900b7e..5277f5a 100644 --- a/16.0.0/gentoo/README.history +++ b/16.0.0/gentoo/README.history @@ -3,6 +3,7 @@ - 86_all_PR122456-pgo-workaround.patch + 86_all_PR122794-libtool.patch + 87_all_PR123152-vect-Fix-dominator-update.patch + + 88_all_PR123089.patch 27 14 December 2025
