https://gcc.gnu.org/g:656db31e4448e7b51a919dc1acfb3080c82f43de

commit r16-574-g656db31e4448e7b51a919dc1acfb3080c82f43de
Author: Pan Li <pan2...@intel.com>
Date:   Mon Apr 28 20:35:08 2025 +0800

    Match: Support form 7 for unsigned integer SAT_ADD
    
    This patch would like to support the form 7 of the unsigned
    integer SAT_ADD, aka below example.
    
      #define DEF_SAT_U_ADD_FMT_7(WT, T)     \
      T __attribute__((noinline))            \
      sat_u_add_##WT##_##T##_fmt_7(T x, T y) \
      {                                      \
        T max = -1;                          \
        WT val = (WT)x + (WT)y;              \
        return val > max ? max : (T)val;     \
      }
    
      DEF_SAT_U_ADD_FMT_7(uint64_t, uint32_t)
    
    If we take -O3 build with -fdump-tree-optimized, we will have
    
    Before this patch:
       5   │ __attribute__((noinline))
       6   │ uint32_t sat_u_add_uint64_t_uint32_t_fmt_7 (uint32_t x, uint32_t y)
       7   │ {
       8   │   uint64_t val;
       9   │   long unsigned int _1;
      10   │   long unsigned int _2;
      11   │   uint32_t _3;
      12   │   uint32_t _7;
      13   │
      14   │   <bb 2> [local count: 1073741824]:
      15   │   _1 = (long unsigned int) x_4(D);
      16   │   _2 = (long unsigned int) y_5(D);
      17   │   val_6 = _1 + _2;
      18   │   if (val_6 <= 4294967295)
      19   │     goto <bb 3>; [65.00%]
      20   │   else
      21   │     goto <bb 4>; [35.00%]
      22   │
      23   │   <bb 3> [local count: 697932184]:
      24   │   _7 = x_4(D) + y_5(D);
      25   │
      26   │   <bb 4> [local count: 1073741824]:
      27   │   # _3 = PHI <4294967295(2), _7(3)>
      28   │   return _3;
      29   │
      30   │ }
    
    After this patch:
       4   │ __attribute__((noinline))
       5   │ uint32_t sat_u_add_uint64_t_uint32_t_fmt_7 (uint32_t x, uint32_t y)
       6   │ {
       7   │   uint32_t _3;
       8   │
       9   │   <bb 2> [local count: 1073741824]:
      10   │   _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
      11   │   return _3;
      12   │
      13   │ }
    
    This change also effects on vector mode too.
    
    The below test suites are passed for this patch.
    * The rv64gcv fully regression test.
    * The x86 bootstrap test.
    * The x86 fully regression test.
    
    gcc/ChangeLog:
    
            * match.pd: Add form 7 matching pattern for unsigned integer
            SAT_ADD.
    
    Signed-off-by: Pan Li <pan2...@intel.com>

Diff:
---
 gcc/match.pd | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index ab496d923cc0..f4050687647b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3242,7 +3242,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      SAT_U_ADD = IMAGPART (SUM) != 0 ? -1 : REALPART (SUM)  */
   (cond^ (ne (imagpart (IFN_ADD_OVERFLOW@2 @0 INTEGER_CST@1)) integer_zerop)
         integer_minus_onep (realpart @2))
-  (if (types_match (type, @0) && int_fits_type_p (@1, type)))))
+  (if (types_match (type, @0) && int_fits_type_p (@1, type))))
+ (match (unsigned_integer_sat_add @0 @1)
+  /* WIDEN_SUM = (WT)X + (WT)Y
+     SAT_U_ADD = WIDEN_SUM > MAX ? MAX : (NT)WIDEN_SUM  */
+  (cond^ (le (plus (convert@2 @0) (convert@3 @1)) INTEGER_CST@4)
+        (plus:c @0 @1) integer_minus_onep)
+  (if (types_match (type, @0, @1) && types_match (@2, @3))
+   (with
+    {
+     unsigned precision = TYPE_PRECISION (type);
+     unsigned widen_precision = TYPE_PRECISION (TREE_TYPE (@2));
+     wide_int max = wi::mask (precision, false, widen_precision);
+     wide_int c4 = wi::to_wide (@4);
+    }
+    (if (wi::eq_p (c4, max) && widen_precision > precision))))))
 
 /* Saturation sub for unsigned integer.  */
 (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type))

Reply via email to