Hi,

The following patch implements the vectorization logic for FLOOR_MOD_EXPR and 
FLOOR_DIV_EXPR. According to the logic mentioned in the PR, we have 
For signed operands,
    r = x %[fl] y;                                                              
                                                                                
                      
    is                                                                          
                                                                                
                      
    r = x % y; if (r && (x ^ y) < 0) r += y;                                    
                                                                                
                      
    and                                                                         
                                                                                
                      
    d = x /[fl] y;                                                              
                                                                                
                      
    is                                                                          
                                                                                
                      
    r = x % y; d = x / y; if (r && (x ^ y) < 0) --d;

The first part enables FLOOR_{MOD,DIV}_EXPR in the switch case. Since the second
operand is always a constant (check done in line 4875), we check if the operands
are signed by seeing if first operand has unsigned type and the second operand 
is greater than zero. If so, then FLOOR_{DIV,MOD} is same as TRUNC_{DIV,MOD}.

For the signed operands, the logic written above is implemented right after the 
TRUNC_MOD_EXPR ends, since that is needed in both cases. The pseudo code for 
vector implementation is as follows (op0, and op1 are the operands, and r is the
remainder = op0%op1)

v1 = op0 ^ op1 
v2 = (v1 < 0)
v3 = (r!=0)
v4 = (v3 && v2)

// For FLOOR_MOD_EXPR
result = r + (v4 ? op1 : 0)
// For FLOOR_DIV_EXPR 
result = d - (v4 ? 1 : 0)

Please let me know if this logic is fine. Also I needed some more inputs on 
this.
1. In the if (interger_pow2p (oprnd1)) path, are there any recommendations, or 
would 
it be fine to produce this same code? I have skipped the handling for 
correctness
for now. 
2. I can have a test case for FLOOR_MOD_EXPR using the modulo intrinsic 
procedure. 
Since the PR talks about FLOOR_DIV_EXPR and {CEIL,ROUND}_{MOD,DIV}_EXPR, could 
someone please suggest source code that can have these operators in the gimple? 
I am not sure how to feed gcc just the updated gimple (I assume this is not 
possible currently).


        PR vect/119702
gcc:
        * gcc/tree-vect-patterns.cc: Added vectorization logic for 
FLOOR_MOD_EXPR
        and FLOOR_DIV_EXPR
---
 gcc/tree-vect-patterns.cc | 82 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index b39da1062c0..c5e3c758ef0 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4862,6 +4862,8 @@ vect_recog_divmod_pattern (vec_info *vinfo,
     case TRUNC_DIV_EXPR:
     case EXACT_DIV_EXPR:
     case TRUNC_MOD_EXPR:
+    case FLOOR_MOD_EXPR:
+    case FLOOR_DIV_EXPR:
       break;
     default:
       return NULL;
@@ -4881,6 +4883,23 @@ vect_recog_divmod_pattern (vec_info *vinfo,
   if (vectype == NULL_TREE)
     return NULL;
 
+  bool unsignedp = TYPE_UNSIGNED (itype) && (tree_int_cst_sgn (oprnd1) > 0);
+
+  if (unsignedp) 
+  {
+    switch (rhs_code) 
+    {
+      case FLOOR_DIV_EXPR:
+        rhs_code = TRUNC_DIV_EXPR;
+        break;
+      case FLOOR_MOD_EXPR:
+        rhs_code = TRUNC_MOD_EXPR;
+        break;
+      default:
+        break;
+    }
+  }
+
   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
     {
       /* If the target can handle vectorized division or modulo natively,
@@ -4893,7 +4912,9 @@ vect_recog_divmod_pattern (vec_info *vinfo,
     }
 
   prec = TYPE_PRECISION (itype);
-  if (integer_pow2p (oprnd1))
+  if (integer_pow2p (oprnd1) 
+      && rhs_code != FLOOR_DIV_EXPR 
+      && rhs_code != FLOOR_MOD_EXPR)
     {
       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
        return NULL;
@@ -5315,13 +5336,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
        }
     }
 
-  if (rhs_code == TRUNC_MOD_EXPR)
+  if (rhs_code == TRUNC_MOD_EXPR 
+      || rhs_code == FLOOR_MOD_EXPR
+      || rhs_code == FLOOR_DIV_EXPR)
     {
       tree r, t1;
 
       /* We divided.  Now finish by:
-        t1 = q * oprnd1;
-        r = oprnd0 - t1;  */
+      t1 = q * oprnd1;
+      r = oprnd0 - t1;  */
       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
 
       t1 = vect_recog_temp_ssa_var (itype, NULL);
@@ -5330,6 +5353,57 @@ vect_recog_divmod_pattern (vec_info *vinfo,
 
       r = vect_recog_temp_ssa_var (itype, NULL);
       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
+
+      if (rhs_code == FLOOR_MOD_EXPR
+          || rhs_code == FLOOR_DIV_EXPR) 
+      {
+        //     r = x %[fl] y;                                                  
                                                                                
                                  
+        // is                                                                  
                                                                                
                              
+        // r = x % y; if (r && (x ^ y) < 0) r += y;   
+        // Extract the sign bit
+        // x^y
+        append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+        tree cond_reg = vect_recog_temp_ssa_var(itype, NULL);
+        def_stmt = gimple_build_assign(cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
+        append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+        
+        // x^y < 0
+        tree cond_reg2 = vect_recog_temp_ssa_var(boolean_type_node, NULL);
+        def_stmt = gimple_build_assign(cond_reg2, LT_EXPR, cond_reg, 
build_int_cst(itype, 0));
+        append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, 
truth_type_for(vectype), itype);
+
+        // r != 0
+        tree cond_reg3 = vect_recog_temp_ssa_var(boolean_type_node, NULL);
+        def_stmt = gimple_build_assign(cond_reg3, NE_EXPR, r, 
build_int_cst(itype, 0));
+        append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, 
truth_type_for(vectype), itype);
+
+        // x^y < 0 && r != 0
+        tree cond_reg4 = vect_recog_temp_ssa_var(boolean_type_node, NULL);
+        def_stmt = gimple_build_assign(cond_reg4, BIT_AND_EXPR, cond_reg3, 
cond_reg2);
+        append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, 
truth_type_for(vectype), itype);
+        if (rhs_code == FLOOR_MOD_EXPR) 
+        {
+          //  (x^y < 0 && r) ? y : 0
+          tree extr_cond = vect_recog_temp_ssa_var(itype, NULL);
+          def_stmt = gimple_build_assign(extr_cond, COND_EXPR, cond_reg4, 
oprnd1, build_int_cst(itype, 0));
+          append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+          
+          // r += (x ^ y < 0 && r) ? y : 0
+          tree floor_mod_r = vect_recog_temp_ssa_var(itype, NULL);
+          pattern_stmt = gimple_build_assign(floor_mod_r, PLUS_EXPR, r, 
extr_cond);
+        } else if (rhs_code == FLOOR_DIV_EXPR)
+        {
+          //  (x^y < 0 && r) ? 1 : 0
+          tree extr_cond = vect_recog_temp_ssa_var(itype, NULL);
+          def_stmt = gimple_build_assign(extr_cond, COND_EXPR, cond_reg4, 
+            build_int_cst(itype, 1), build_int_cst(itype, 0));
+          append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+          
+          // q -= (x ^ y < 0 && r) ? 1 : 0
+          tree floor_mod_r = vect_recog_temp_ssa_var(itype, NULL);
+          pattern_stmt = gimple_build_assign(floor_mod_r, MINUS_EXPR, q, 
extr_cond);
+        }
+      }
     }
 
   /* Pattern detected.  */
-- 
2.47.3

Reply via email to