Hi, I am new to this, and I really need your advice, thanks.

I noticed PR71716 and I want to enable ATOMIC_COMPARE_EXCHANGE
internal-fn optimization

for floating type or types contains padding (e.g., long double).
Please correct me if I happen to
make any mistakes, Thanks!

Firstly, about the concerns of sNaNs float/doduble value, it seems
work well and shall have been
covered by testsuite/gcc.dg/atomic/c11-atomic-exec-5.c

Secondly, since ATOMIC_COMPARE_EXCHANGE is only enabled when expected
var is only addressable
because of the call, the padding bits can not be modified by any other
stmts. So we can save all
bits after ATOMIC_COMPARE_EXCHANGE call and extract the padding bits.
After first iteration, the
extracted padding bits can be mixed with the expected var.

Bootstrapped/regtested on x86_64-linux.

I did some benchmarks, and there is some significant time optimization
for float/double types,

while there is no regression for long double type.

Thanks,

xndcn


gcc/ChangeLog:

        * gimple-fold.cc (optimize_atomic_compare_exchange_p): enable
        for SCALAR_FLOAT_TYPE_P type of expected var, or if TYPE_PRECISION
        is different from mode's bitsize
        (fold_builtin_atomic_compare_exchange): if TYPE_PRECISION is
        different from mode's bitsize, try to keep track all the bits and
        mix it with VIEW_CONVERT_EXPR<itype>(expected).

Signed-off-by: xndcn <xnd...@gmail.com>
---
 gcc/gimple-fold.cc | 77 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 71 insertions(+), 6 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index cb4b57250..321ff4f41 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -5306,12 +5306,7 @@ optimize_atomic_compare_exchange_p (gimple *stmt)
       || !auto_var_in_fn_p (TREE_OPERAND (expected, 0), current_function_decl)
       || TREE_THIS_VOLATILE (etype)
       || VECTOR_TYPE_P (etype)
-      || TREE_CODE (etype) == COMPLEX_TYPE
-      /* Don't optimize floating point expected vars, VIEW_CONVERT_EXPRs
-        might not preserve all the bits.  See PR71716.  */
-      || SCALAR_FLOAT_TYPE_P (etype)
-      || maybe_ne (TYPE_PRECISION (etype),
-                  GET_MODE_BITSIZE (TYPE_MODE (etype))))
+      || TREE_CODE (etype) == COMPLEX_TYPE)
     return false;

   tree weak = gimple_call_arg (stmt, 3);
@@ -5350,8 +5345,10 @@ fold_builtin_atomic_compare_exchange
(gimple_stmt_iterator *gsi)
   tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt)));
   tree ctype = build_complex_type (itype);
   tree expected = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
+  tree etype = TREE_TYPE (expected);
   bool throws = false;
   edge e = NULL;
+  tree allbits = NULL_TREE;
   gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)),
                                   expected);
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -5362,6 +5359,67 @@ fold_builtin_atomic_compare_exchange
(gimple_stmt_iterator *gsi)
                               build1 (VIEW_CONVERT_EXPR, itype,
                                       gimple_assign_lhs (g)));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
+
+      // VIEW_CONVERT_EXPRs might not preserve all the bits.  See PR71716.
+      // so we have to keep track all bits here.
+      if (maybe_ne (TYPE_PRECISION (etype),
+                   GET_MODE_BITSIZE (TYPE_MODE (etype))))
+       {
+         gimple_stmt_iterator cgsi
+           = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+         allbits = create_tmp_var (itype);
+         // allbits is initialized to 0, which can be ignored first time
+         gimple *init_stmt
+           = gimple_build_assign (allbits, build_int_cst (itype, 0));
+         gsi_insert_before (&cgsi, init_stmt, GSI_SAME_STMT);
+         tree maskbits = create_tmp_var (itype);
+         // maskbits is initialized to full 1 (0xFFF...)
+         init_stmt = gimple_build_assign (maskbits, build1 (BIT_NOT_EXPR,
+                                                            itype, allbits));
+         gsi_insert_before (&cgsi, init_stmt, GSI_SAME_STMT);
+
+         // g = g & maskbits
+         g = gimple_build_assign (make_ssa_name (itype),
+                                  build2 (BIT_AND_EXPR, itype,
+                                          gimple_assign_lhs (g), maskbits));
+         gsi_insert_before (gsi, g, GSI_SAME_STMT);
+
+         gimple *def_mask = gimple_build_assign (
+           make_ssa_name (itype),
+           build2 (LSHIFT_EXPR, itype, build_int_cst (itype, 1),
+                   build_int_cst (itype, TYPE_PRECISION (etype))));
+         gsi_insert_before (gsi, def_mask, GSI_SAME_STMT);
+         def_mask = gimple_build_assign (make_ssa_name (itype),
+                                         build2 (MINUS_EXPR, itype,
+                                                 gimple_assign_lhs (def_mask),
+                                                 build_int_cst (itype, 1)));
+         gsi_insert_before (gsi, def_mask, GSI_SAME_STMT);
+         // maskbits = (1 << TYPE_PRECISION (etype)) - 1
+         def_mask = gimple_build_assign (maskbits, SSA_NAME,
+                                         gimple_assign_lhs (def_mask));
+         gsi_insert_before (gsi, def_mask, GSI_SAME_STMT);
+
+         // paddingbits = (~maskbits) & allbits
+         def_mask
+           = gimple_build_assign (make_ssa_name (itype),
+                                  build1 (BIT_NOT_EXPR, itype,
+                                          gimple_assign_lhs (def_mask)));
+         gsi_insert_before (gsi, def_mask, GSI_SAME_STMT);
+         def_mask
+           = gimple_build_assign (make_ssa_name (itype),
+                                  build2 (BIT_AND_EXPR, itype, allbits,
+                                          gimple_assign_lhs (def_mask)));
+         gsi_insert_before (gsi, def_mask, GSI_SAME_STMT);
+
+         // g = g | paddingbits, i.e.,
+         // g = (VIEW_CONVERT_EXPR<itype>(expected) & maskbits)
+         //       | (allbits &(~maskbits))
+         g = gimple_build_assign (make_ssa_name (itype),
+                                  build2 (BIT_IOR_EXPR, itype,
+                                          gimple_assign_lhs (g),
+                                          gimple_assign_lhs (def_mask)));
+         gsi_insert_before (gsi, g, GSI_SAME_STMT);
+       }
     }
   int flag = (integer_onep (gimple_call_arg (stmt, 3)) ? 256 : 0)
             + int_size_in_bytes (itype);
@@ -5410,6 +5468,13 @@ fold_builtin_atomic_compare_exchange
(gimple_stmt_iterator *gsi)
     gsi_insert_after (gsi, g, GSI_NEW_STMT);
   if (!useless_type_conversion_p (TREE_TYPE (expected), itype))
     {
+      // save all bits here
+      if (maybe_ne (TYPE_PRECISION (etype),
+                   GET_MODE_BITSIZE (TYPE_MODE (etype))))
+       {
+         g = gimple_build_assign (allbits, SSA_NAME, gimple_assign_lhs (g));
+         gsi_insert_after (gsi, g, GSI_NEW_STMT);
+       }
       g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)),
                               VIEW_CONVERT_EXPR,
                               build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expected),
-- 
2.25.1

Reply via email to