https://gcc.gnu.org/g:a82f188449c5d6a2382ef5456c9e98a582d5ba11

commit r16-5983-ga82f188449c5d6a2382ef5456c9e98a582d5ba11
Author: Robin Dapp <[email protected]>
Date:   Fri Nov 28 16:24:38 2025 +0100

    optabs: Add else operand to LEN_LOAD.
    
    When adding else operands to maskload and friends we didn't bother to do
    the same for len_load (as we never use the residual elements anyway).
    In order to simplify handling in gimple-fold, this patch adds the else
    operand now.  Both, power and s390, zero out inactive elements.
    
    gcc/ChangeLog:
    
            * config/rs6000/predicates.md (lxvl_else_operand): New
            predicate.
            * config/rs6000/vsx.md: Add else operand.
            * config/s390/predicates.md (vll_else_operand): New predicate.
            * config/s390/vector.md: Add else operand.
            * doc/md.texi: Document else operand.
            * internal-fn.cc (internal_fn_len_index): Adjust IFN_LEN_LOAD.
            (internal_fn_else_index): Add IFN_LEN_LOAD.
            * optabs-tree.cc (target_supports_len_load_store_p): Get else
            value for len_load.
            * tree-vect-stmts.cc (vectorizable_load): Pun the else value
            type.

Diff:
---
 gcc/config/rs6000/predicates.md |  5 +++++
 gcc/config/rs6000/vsx.md        |  7 ++++---
 gcc/config/s390/predicates.md   |  5 +++++
 gcc/config/s390/vector.md       |  7 ++++---
 gcc/doc/md.texi                 | 20 +++++++++++---------
 gcc/internal-fn.cc              | 13 +++++++++----
 gcc/optabs-tree.cc              | 31 +++++++++++++++----------------
 gcc/tree-vect-stmts.cc          | 17 +++++++++++++----
 8 files changed, 66 insertions(+), 39 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 647e89afb6a7..5133dacd794b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -2166,3 +2166,8 @@
   (and (match_code "subreg")
        (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
                    == SUBREG_BYTE (op)")))
+
+; Else operand for LEN_LOAD.
+(define_predicate "lxvl_else_operand"
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index dd3573b80868..4d47833c9440 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5798,13 +5798,14 @@
 (define_expand "len_load_v16qi"
   [(match_operand:V16QI 0 "vlogical_operand")
    (match_operand:V16QI 1 "memory_operand")
-   (match_operand:QI 2 "gpc_reg_operand")
-   (match_operand:QI 3 "zero_constant")]
+   (match_operand:V16QI 2 "lxvl_else_operand")
+   (match_operand:QI 3 "gpc_reg_operand")
+   (match_operand:QI 4 "zero_constant")]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
   rtx mem = XEXP (operands[1], 0);
   mem = force_reg (DImode, mem);
-  rtx len = gen_lowpart (DImode, operands[2]);
+  rtx len = gen_lowpart (DImode, operands[3]);
   emit_insn (gen_lxvl (operands[0], mem, len));
   DONE;
 })
diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index c7b93bd7fcb9..e181399e91b1 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -614,3 +614,8 @@
 (define_predicate "vll_bias_operand"
   (and (match_code "const_int")
        (match_test "op == CONSTM1_RTX (QImode)")))
+
+; Else operand for LEN_LOAD.
+(define_predicate "vll_else_operand"
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 375e3e81ae17..367389c3e585 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -3557,15 +3557,16 @@
 (define_expand "len_load_v16qi"
   [(match_operand:V16QI 0 "register_operand")
    (match_operand:V16QI 1 "memory_operand")
-   (match_operand:QI 2 "register_operand")
-   (match_operand:QI 3 "vll_bias_operand")
+   (match_operand:V16QI 2 "vll_else_operand")
+   (match_operand:QI 3 "register_operand")
+   (match_operand:QI 4 "vll_bias_operand")
   ]
   "TARGET_VX && TARGET_64BIT"
 {
   rtx mem = adjust_address (operands[1], BLKmode, 0);
 
   rtx len = gen_reg_rtx (SImode);
-  emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[2]));
+  emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[3]));
   emit_insn (gen_vllv16qi (operands[0], len, mem));
   DONE;
 })
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7bf2cc0aa1ba..86ed4ffe1e8e 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5408,18 +5408,20 @@ This pattern is not allowed to @code{FAIL}.
 
 @cindex @code{len_load_@var{m}} instruction pattern
 @item @samp{len_load_@var{m}}
-Load (operand 2 + operand 3) elements from memory operand 1
-into vector register operand 0, setting the other elements of
-operand 0 to undefined values.  Operands 0 and 1 have mode @var{m},
-which must be a vector mode.  Operand 2 has whichever integer mode the
-target prefers.  Operand 3 conceptually has mode @code{QI}.
-
-Operand 2 can be a variable or a constant amount.  Operand 3 specifies a
+Load (operand 3 + operand 4) elements from memory operand 1
+into vector register operand 0.  Operands 0 and 1 have mode @var{m},
+which must be a vector mode.  Operand 3 has whichever integer mode the
+target prefers.  Operand 2 (the “else value”) is of mode @var{m} and
+specifies which value is loaded for the remaining elements.  The predicate
+of operand 2 must only accept the else values that the target actually
+supports.  Operand 4 conceptually has mode @code{QI}.
+
+Operand 3 can be a variable or a constant amount.  Operand 4 specifies a
 constant bias: it is either a constant 0 or a constant -1.  The predicate on
-operand 3 must only accept the bias values that the target actually supports.
+operand 4 must only accept the bias values that the target actually supports.
 GCC handles a bias of 0 more efficiently than a bias of -1.
 
-If (operand 2 + operand 3) exceeds the number of elements in mode
+If (operand 3 + operand 4) exceeds the number of elements in mode
 @var{m}, the behavior is undefined.
 
 If the target prefers the length to be measured in bytes rather than
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 13fbd2ce7884..8df4f5008a15 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -5006,6 +5006,7 @@ internal_fn_len_index (internal_fn fn)
   switch (fn)
     {
     case IFN_LEN_LOAD:
+      return 3;
     case IFN_LEN_STORE:
       return 2;
 
@@ -5071,6 +5072,9 @@ internal_fn_else_index (internal_fn fn)
     case IFN_COND_LEN_NOT:
       return 2;
 
+    case IFN_LEN_LOAD:
+      return 2;
+
     case IFN_COND_ADD:
     case IFN_COND_SUB:
     case IFN_COND_MUL:
@@ -5401,7 +5405,7 @@ internal_len_load_store_bias (internal_fn ifn, 
machine_mode mode)
 {
   optab optab = direct_internal_fn_optab (ifn);
   insn_code icode = direct_optab_handler (optab, mode);
-  int bias_no = 3;
+  int bias_idx = internal_fn_len_index (ifn) + 1;
 
   if (icode == CODE_FOR_nothing)
     {
@@ -5412,22 +5416,23 @@ internal_len_load_store_bias (internal_fn ifn, 
machine_mode mode)
        {
          /* Try MASK_LEN_LOAD.  */
          optab = direct_internal_fn_optab (IFN_MASK_LEN_LOAD);
+         bias_idx = internal_fn_len_index (IFN_MASK_LEN_LOAD) + 1;
        }
       else
        {
          /* Try MASK_LEN_STORE.  */
          optab = direct_internal_fn_optab (IFN_MASK_LEN_STORE);
+         bias_idx = internal_fn_len_index (IFN_MASK_LEN_STORE) + 1;
        }
       icode = convert_optab_handler (optab, mode, mask_mode);
-      bias_no = 4;
     }
 
   if (icode != CODE_FOR_nothing)
     {
       /* For now we only support biases of 0 or -1.  Try both of them.  */
-      if (insn_operand_matches (icode, bias_no, GEN_INT (0)))
+      if (insn_operand_matches (icode, bias_idx, GEN_INT (0)))
        return 0;
-      if (insn_operand_matches (icode, bias_no, GEN_INT (-1)))
+      if (insn_operand_matches (icode, bias_idx, GEN_INT (-1)))
        return -1;
     }
 
diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc
index 0de74c7966af..53788b9259bd 100644
--- a/gcc/optabs-tree.cc
+++ b/gcc/optabs-tree.cc
@@ -615,28 +615,27 @@ target_supports_len_load_store_p (machine_mode mode, bool 
is_load,
 {
   optab op = is_load ? len_load_optab : len_store_optab;
   optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab;
+  internal_fn which_ifn;
 
-  if (direct_optab_handler (op, mode))
+  enum insn_code icode;
+  if ((icode = direct_optab_handler (op, mode)) != CODE_FOR_nothing)
     {
-      if (ifn)
-       *ifn = is_load ? IFN_LEN_LOAD : IFN_LEN_STORE;
-      return true;
+      which_ifn = is_load ? IFN_LEN_LOAD : IFN_LEN_STORE;
     }
   machine_mode mask_mode;
-  enum insn_code icode;
-  if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
+  if (!icode
+      && targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
       && ((icode = convert_optab_handler (masked_op, mode, mask_mode))
          != CODE_FOR_nothing))
-    {
-      if (ifn)
-       *ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
-      if (elsvals && is_load)
-       get_supported_else_vals (icode,
-                                internal_fn_else_index (IFN_MASK_LEN_LOAD),
-                                *elsvals);
-      return true;
-    }
-  return false;
+    which_ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
+
+  if (icode && elsvals && is_load)
+    get_supported_else_vals (icode, internal_fn_else_index (which_ifn),
+                            *elsvals);
+
+  if (icode && ifn)
+    *ifn = which_ifn;
+  return icode;
 }
 
 /* If target supports vector load/store with length for vector mode MODE,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 95f015f92980..56ff1c846d1e 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -11392,10 +11392,18 @@ vectorizable_load (vec_info *vinfo,
              {
                tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
                gcall *call;
+
+               /* Need conversion if the vectype is punned by VnQI.  */
+               els_vectype = vectype;
+               if (vmode != new_vmode)
+                 els_vectype
+                   = build_vector_type_for_mode (unsigned_intQI_type_node,
+                                                 new_vmode);
+               vec_els = vect_get_mask_load_else (maskload_elsval,
+                                                  els_vectype);
+
                if (partial_ifn == IFN_MASK_LEN_LOAD)
                  {
-                   vec_els = vect_get_mask_load_else (maskload_elsval,
-                                                      vectype);
                    if (type_mode_padding_p
                        && maskload_elsval != MASK_LOAD_ELSE_ZERO)
                      need_zeroing = true;
@@ -11405,9 +11413,10 @@ vectorizable_load (vec_info *vinfo,
                                                       final_len, bias);
                  }
                else
-                 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
+                 call = gimple_build_call_internal (IFN_LEN_LOAD, 5,
                                                     dataref_ptr, ptr,
-                                                    final_len, bias);
+                                                    vec_els, final_len,
+                                                    bias);
                gimple_call_set_nothrow (call, true);
                new_stmt = call;
                data_ref = NULL_TREE;

Reply via email to