https://gcc.gnu.org/g:a82f188449c5d6a2382ef5456c9e98a582d5ba11
commit r16-5983-ga82f188449c5d6a2382ef5456c9e98a582d5ba11 Author: Robin Dapp <[email protected]> Date: Fri Nov 28 16:24:38 2025 +0100 optabs: Add else operand to LEN_LOAD. When adding else operands to maskload and friends we didn't bother to do the same for len_load (as we never use the residual elements anyway). In order to simplify handling in gimple-fold, this patch adds the else operand now. Both, power and s390, zero out inactive elements. gcc/ChangeLog: * config/rs6000/predicates.md (lxvl_else_operand): New predicate. * config/rs6000/vsx.md: Add else operand. * config/s390/predicates.md (vll_else_operand): New predicate. * config/s390/vector.md: Add else operand. * doc/md.texi: Document else operand. * internal-fn.cc (internal_fn_len_index): Adjust IFN_LEN_LOAD. (internal_fn_else_index): Add IFN_LEN_LOAD. * optabs-tree.cc (target_supports_len_load_store_p): Get else value for len_load. * tree-vect-stmts.cc (vectorizable_load): Pun the else value type. Diff: --- gcc/config/rs6000/predicates.md | 5 +++++ gcc/config/rs6000/vsx.md | 7 ++++--- gcc/config/s390/predicates.md | 5 +++++ gcc/config/s390/vector.md | 7 ++++--- gcc/doc/md.texi | 20 +++++++++++--------- gcc/internal-fn.cc | 13 +++++++++---- gcc/optabs-tree.cc | 31 +++++++++++++++---------------- gcc/tree-vect-stmts.cc | 17 +++++++++++++---- 8 files changed, 66 insertions(+), 39 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 647e89afb6a7..5133dacd794b 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -2166,3 +2166,8 @@ (and (match_code "subreg") (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) == SUBREG_BYTE (op)"))) + +; Else operand for LEN_LOAD. +(define_predicate "lxvl_else_operand" + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index dd3573b80868..4d47833c9440 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5798,13 +5798,14 @@ (define_expand "len_load_v16qi" [(match_operand:V16QI 0 "vlogical_operand") (match_operand:V16QI 1 "memory_operand") - (match_operand:QI 2 "gpc_reg_operand") - (match_operand:QI 3 "zero_constant")] + (match_operand:V16QI 2 "lxvl_else_operand") + (match_operand:QI 3 "gpc_reg_operand") + (match_operand:QI 4 "zero_constant")] "TARGET_P9_VECTOR && TARGET_64BIT" { rtx mem = XEXP (operands[1], 0); mem = force_reg (DImode, mem); - rtx len = gen_lowpart (DImode, operands[2]); + rtx len = gen_lowpart (DImode, operands[3]); emit_insn (gen_lxvl (operands[0], mem, len)); DONE; }) diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md index c7b93bd7fcb9..e181399e91b1 100644 --- a/gcc/config/s390/predicates.md +++ b/gcc/config/s390/predicates.md @@ -614,3 +614,8 @@ (define_predicate "vll_bias_operand" (and (match_code "const_int") (match_test "op == CONSTM1_RTX (QImode)"))) + +; Else operand for LEN_LOAD. +(define_predicate "vll_else_operand" + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 375e3e81ae17..367389c3e585 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -3557,15 +3557,16 @@ (define_expand "len_load_v16qi" [(match_operand:V16QI 0 "register_operand") (match_operand:V16QI 1 "memory_operand") - (match_operand:QI 2 "register_operand") - (match_operand:QI 3 "vll_bias_operand") + (match_operand:V16QI 2 "vll_else_operand") + (match_operand:QI 3 "register_operand") + (match_operand:QI 4 "vll_bias_operand") ] "TARGET_VX && TARGET_64BIT" { rtx mem = adjust_address (operands[1], BLKmode, 0); rtx len = gen_reg_rtx (SImode); - emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[2])); + emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[3])); emit_insn (gen_vllv16qi (operands[0], len, mem)); DONE; }) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 7bf2cc0aa1ba..86ed4ffe1e8e 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5408,18 +5408,20 @@ This pattern is not allowed to @code{FAIL}. @cindex @code{len_load_@var{m}} instruction pattern @item @samp{len_load_@var{m}} -Load (operand 2 + operand 3) elements from memory operand 1 -into vector register operand 0, setting the other elements of -operand 0 to undefined values. Operands 0 and 1 have mode @var{m}, -which must be a vector mode. Operand 2 has whichever integer mode the -target prefers. Operand 3 conceptually has mode @code{QI}. - -Operand 2 can be a variable or a constant amount. Operand 3 specifies a +Load (operand 3 + operand 4) elements from memory operand 1 +into vector register operand 0. Operands 0 and 1 have mode @var{m}, +which must be a vector mode. Operand 3 has whichever integer mode the +target prefers. Operand 2 (the “else value”) is of mode @var{m} and +specifies which value is loaded for the remaining elements. The predicate +of operand 2 must only accept the else values that the target actually +supports. Operand 4 conceptually has mode @code{QI}. + +Operand 3 can be a variable or a constant amount. Operand 4 specifies a constant bias: it is either a constant 0 or a constant -1. The predicate on -operand 3 must only accept the bias values that the target actually supports. +operand 4 must only accept the bias values that the target actually supports. GCC handles a bias of 0 more efficiently than a bias of -1. -If (operand 2 + operand 3) exceeds the number of elements in mode +If (operand 3 + operand 4) exceeds the number of elements in mode @var{m}, the behavior is undefined. If the target prefers the length to be measured in bytes rather than diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 13fbd2ce7884..8df4f5008a15 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -5006,6 +5006,7 @@ internal_fn_len_index (internal_fn fn) switch (fn) { case IFN_LEN_LOAD: + return 3; case IFN_LEN_STORE: return 2; @@ -5071,6 +5072,9 @@ internal_fn_else_index (internal_fn fn) case IFN_COND_LEN_NOT: return 2; + case IFN_LEN_LOAD: + return 2; + case IFN_COND_ADD: case IFN_COND_SUB: case IFN_COND_MUL: @@ -5401,7 +5405,7 @@ internal_len_load_store_bias (internal_fn ifn, machine_mode mode) { optab optab = direct_internal_fn_optab (ifn); insn_code icode = direct_optab_handler (optab, mode); - int bias_no = 3; + int bias_idx = internal_fn_len_index (ifn) + 1; if (icode == CODE_FOR_nothing) { @@ -5412,22 +5416,23 @@ internal_len_load_store_bias (internal_fn ifn, machine_mode mode) { /* Try MASK_LEN_LOAD. */ optab = direct_internal_fn_optab (IFN_MASK_LEN_LOAD); + bias_idx = internal_fn_len_index (IFN_MASK_LEN_LOAD) + 1; } else { /* Try MASK_LEN_STORE. */ optab = direct_internal_fn_optab (IFN_MASK_LEN_STORE); + bias_idx = internal_fn_len_index (IFN_MASK_LEN_STORE) + 1; } icode = convert_optab_handler (optab, mode, mask_mode); - bias_no = 4; } if (icode != CODE_FOR_nothing) { /* For now we only support biases of 0 or -1. Try both of them. */ - if (insn_operand_matches (icode, bias_no, GEN_INT (0))) + if (insn_operand_matches (icode, bias_idx, GEN_INT (0))) return 0; - if (insn_operand_matches (icode, bias_no, GEN_INT (-1))) + if (insn_operand_matches (icode, bias_idx, GEN_INT (-1))) return -1; } diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc index 0de74c7966af..53788b9259bd 100644 --- a/gcc/optabs-tree.cc +++ b/gcc/optabs-tree.cc @@ -615,28 +615,27 @@ target_supports_len_load_store_p (machine_mode mode, bool is_load, { optab op = is_load ? len_load_optab : len_store_optab; optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab; + internal_fn which_ifn; - if (direct_optab_handler (op, mode)) + enum insn_code icode; + if ((icode = direct_optab_handler (op, mode)) != CODE_FOR_nothing) { - if (ifn) - *ifn = is_load ? IFN_LEN_LOAD : IFN_LEN_STORE; - return true; + which_ifn = is_load ? IFN_LEN_LOAD : IFN_LEN_STORE; } machine_mode mask_mode; - enum insn_code icode; - if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode) + if (!icode + && targetm.vectorize.get_mask_mode (mode).exists (&mask_mode) && ((icode = convert_optab_handler (masked_op, mode, mask_mode)) != CODE_FOR_nothing)) - { - if (ifn) - *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE; - if (elsvals && is_load) - get_supported_else_vals (icode, - internal_fn_else_index (IFN_MASK_LEN_LOAD), - *elsvals); - return true; - } - return false; + which_ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE; + + if (icode && elsvals && is_load) + get_supported_else_vals (icode, internal_fn_else_index (which_ifn), + *elsvals); + + if (icode && ifn) + *ifn = which_ifn; + return icode; } /* If target supports vector load/store with length for vector mode MODE, diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 95f015f92980..56ff1c846d1e 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -11392,10 +11392,18 @@ vectorizable_load (vec_info *vinfo, { tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); gcall *call; + + /* Need conversion if the vectype is punned by VnQI. */ + els_vectype = vectype; + if (vmode != new_vmode) + els_vectype + = build_vector_type_for_mode (unsigned_intQI_type_node, + new_vmode); + vec_els = vect_get_mask_load_else (maskload_elsval, + els_vectype); + if (partial_ifn == IFN_MASK_LEN_LOAD) { - vec_els = vect_get_mask_load_else (maskload_elsval, - vectype); if (type_mode_padding_p && maskload_elsval != MASK_LOAD_ELSE_ZERO) need_zeroing = true; @@ -11405,9 +11413,10 @@ vectorizable_load (vec_info *vinfo, final_len, bias); } else - call = gimple_build_call_internal (IFN_LEN_LOAD, 4, + call = gimple_build_call_internal (IFN_LEN_LOAD, 5, dataref_ptr, ptr, - final_len, bias); + vec_els, final_len, + bias); gimple_call_set_nothrow (call, true); new_stmt = call; data_ref = NULL_TREE;
