https://gcc.gnu.org/g:b4f2fccf302bfe4ce704d11017b1a174eb3da89f

commit r15-4278-gb4f2fccf302bfe4ce704d11017b1a174eb3da89f
Author: Pan Li <pan2...@intel.com>
Date:   Fri Oct 11 11:51:52 2024 +0800

    Match: Support form 1 for vector signed integer SAT_SUB
    
    This patch would like to support the form 1 of the vector signed
    integer SAT_SUB.  Aka below example:
    
    Form 1:
      #define DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX)                     \
      void __attribute__((noinline))                                       \
      vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
      {                                                                    \
        unsigned i;                                                        \
        for (i = 0; i < limit; i++)                                        \
          {                                                                \
            T x = op_1[i];                                                 \
            T y = op_2[i];                                                 \
            T minus = (UT)x - (UT)y;                                       \
            out[i] = (x ^ y) >= 0                                          \
              ? minus                                                      \
              : (minus ^ x) >= 0                                           \
                ? minus                                                    \
                : x < 0 ? MIN : MAX;                                       \
          }                                                                \
      }
    
    DEF_VEC_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)
    
    Before this patch:
      91   │   _108 = .SELECT_VL (ivtmp_106, POLY_INT_CST [16, 16]);
      92   │   vect_x_16.11_80 = .MASK_LEN_LOAD (vectp_op_1.9_78, 8B, { -1, ... 
}, _108, 0);
      93   │   _69 = vect_x_16.11_80 >> 7;
      94   │   vect_x.12_81 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned 
char>(vect_x_16.11_80);
      95   │   vect_y_18.15_85 = .MASK_LEN_LOAD (vectp_op_2.13_83, 8B, { -1, 
... }, _108, 0);
      96   │   vect__7.21_91 = vect_x_16.11_80 ^ vect_y_18.15_85;
      97   │   mask__44.22_92 = vect__7.21_91 < { 0, ... };
      98   │   vect_y.16_86 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned 
char>(vect_y_18.15_85);
      99   │   vect__6.17_87 = vect_x.12_81 - vect_y.16_86;
     100   │   vect_minus_19.18_88 = VIEW_CONVERT_EXPR<vector([16,16]) signed 
char>(vect__6.17_87);
     101   │   vect__8.19_89 = vect_x_16.11_80 ^ vect_minus_19.18_88;
     102   │   mask__42.20_90 = vect__8.19_89 < { 0, ... };
     103   │   mask__41.23_93 = mask__42.20_90 & mask__44.22_92;
     104   │   _4 = .COND_XOR (mask__41.23_93, _69, { 127, ... }, 
vect_minus_19.18_88);
     105   │   .MASK_LEN_STORE (vectp_out.31_102, 8B, { -1, ... }, _108, 0, _4);
     106   │   vectp_op_1.9_79 = vectp_op_1.9_78 + _108;
     107   │   vectp_op_2.13_84 = vectp_op_2.13_83 + _108;
     108   │   vectp_out.31_103 = vectp_out.31_102 + _108;
     109   │   ivtmp_107 = ivtmp_106 - _108;
    
    After this patch:
      81   │   _102 = .SELECT_VL (ivtmp_100, POLY_INT_CST [16, 16]);
      82   │   vect_x_16.11_89 = .MASK_LEN_LOAD (vectp_op_1.9_87, 8B, { -1, ... 
}, _102, 0);
      83   │   vect_y_18.14_93 = .MASK_LEN_LOAD (vectp_op_2.12_91, 8B, { -1, 
... }, _102, 0);
      84   │   vect_patt_38.15_94 = .SAT_SUB (vect_x_16.11_89, vect_y_18.14_93);
      85   │   .MASK_LEN_STORE (vectp_out.16_96, 8B, { -1, ... }, _102, 0, 
vect_patt_38.15_94);
      86   │   vectp_op_1.9_88 = vectp_op_1.9_87 + _102;
      87   │   vectp_op_2.12_92 = vectp_op_2.12_91 + _102;
      88   │   vectp_out.16_97 = vectp_out.16_96 + _102;
      89   │   ivtmp_101 = ivtmp_100 - _102;
    
    The below test suites are passed for this patch.
    * The rv64gcv fully regression test.
    * The x86 bootstrap test.
    * The x86 fully regression test.
    
    gcc/ChangeLog:
    
            * match.pd: Add case 1 matching pattern for vector signed SAT_SUB.
    
    Signed-off-by: Pan Li <pan2...@intel.com>

Diff:
---
 gcc/match.pd | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 78084bb38582..f2b5f3af9ef6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3401,6 +3401,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* Signed saturation sub, case 4:
+   T minus = (T)((UT)X - (UT)Y);
+   SAT_S_SUB = (X ^ Y) < 0 & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus;
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_sub @0 @1)
+ (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
+                                                        (nop_convert @1))))
+                      integer_zerop)
+                  (lt (bit_xor:c @0 @1) integer_zerop))
+       (bit_xor:c (nop_convert (negate (nop_convert (convert
+                                                     (lt @0 integer_zerop)))))
+                  max_value)
+       @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))))
+
 /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT).
    SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
 (match (unsigned_integer_sat_trunc @0)

Reply via email to