The external library only provides the __atomic_fetch_{add,sub,and,or,xor,nand} routines, which fetch the value in memory before the operation is performed. It does not provide the alternate operation which fetches the value after the operation (it is trivial to calculate).

If the __atomic_{op}_fetch routine (which returns the value after the operation) cannot be inlined, it needs to be transformed into the fetch-before form, and have the appropriate calculation done to return the correct value.

ie, if

     c = __atomic_add_fetch (&b, a, __ATOMIC_RELAXED)

requires an external library call, it needs to be transformed into

     c = __atomic_fetch_add (&b, a, __ATOMIC_RELAXED) + a;

This patch takes care of that. It seems to work when I hacked up some test cases, but until I implement a flag to disable inlining all atomics, its pretty hard to test thoroughly.

I find the bit in expand_builtin_atomic_fetch_op which changes the function decl for the call a bit hacky maybe, but it seems to work. It looks like that is exactly the way a builtin is built up. (ie, I don't think the gcc_assert I added can fail, or previous calls to get_callee_fndecl() wouldn't have returned a value which got us to this hunk of code in the first place. Ie, the code there is reverse engineered from get_callee_fndecl() to find the function decl and change it. we know the built-in variations that are being changed are identical in parameters and everything else.

thoughts?

Anyway, it bootstraps and passes all tests on x86_64-unknown-linux-gnu.

Andrew

        * builtins.c (expand_builtin_atomic_fetch_op): External calls for
        'op_fetch' builtins need to instead call 'fetch_op' externals and issue 
        correction code.
        (expand_builtin): Provide proper builtin name for external call and
        ignored flag to expand_builtin_atomic_fetch_op.

Index: builtins.c
===================================================================
*** builtins.c  (revision 180460)
--- builtins.c  (working copy)
*************** expand_builtin_atomic_store (enum machin
*** 5386,5399 ****
     TARGET is an optional place for us to store the results.
     CODE is the operation, PLUS, MINUS, ADD, XOR, or IOR.
     FETCH_AFTER is true if returning the result of the operation.
!    FETCH_AFTER is false if returning the value before the operation.  */
  
  static rtx
  expand_builtin_atomic_fetch_op (enum machine_mode mode, tree exp, rtx target,
!                               enum rtx_code code, bool fetch_after)
  {
!   rtx val, mem;
    enum memmodel model;
  
    model = get_memmodel (CALL_EXPR_ARG (exp, 2));
  
--- 5386,5405 ----
     TARGET is an optional place for us to store the results.
     CODE is the operation, PLUS, MINUS, ADD, XOR, or IOR.
     FETCH_AFTER is true if returning the result of the operation.
!    FETCH_AFTER is false if returning the value before the operation.
!    IGNORE is true if the result is not used.
!    EXT_CALL is the correct builtin for an external call if this cannot be
!    resolved to an instriction sequence.  */
  
  static rtx
  expand_builtin_atomic_fetch_op (enum machine_mode mode, tree exp, rtx target,
!                               enum rtx_code code, bool fetch_after,
!                               bool ignore, enum built_in_function ext_call)
  {
!   rtx val, mem, ret;
    enum memmodel model;
+   tree fndecl;
+   tree addr;
  
    model = get_memmodel (CALL_EXPR_ARG (exp, 2));
  
*************** expand_builtin_atomic_fetch_op (enum mac
*** 5401,5407 ****
    mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
    val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
  
!   return expand_atomic_fetch_op (target, mem, val, code, model, fetch_after);
  }
  
  /* Return true if size ARG is always lock free on this architecture.  */
--- 5407,5436 ----
    mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
    val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
  
!   ret = expand_atomic_fetch_op (target, mem, val, code, model, fetch_after);
!   if (ret)
!     return ret;
! 
!   /* Return if a different routine isn't needed for the library call.  */
!   if (ext_call == BUILT_IN_NONE)
!     return NULL_RTX;
! 
!   /* Change the call to the specified function.  */
!   fndecl = get_callee_fndecl (exp);
!   addr = CALL_EXPR_FN (exp);
!   STRIP_NOPS (addr);
! 
!   gcc_assert (TREE_OPERAND (addr, 0) == fndecl);
!   TREE_OPERAND (addr, 0) = builtin_decl_explicit(ext_call);
! 
!   /* Expand the call here so we can emit trailing code.  */
!   ret = expand_call (exp, target, ignore);
! 
!   /* Then issue the arithmetic correction to return the right result.  */
!   if (!ignore)
!     ret = expand_simple_binop (mode, code, ret, val, NULL_RTX, true,
!                              OPTAB_LIB_WIDEN);
!   return ret;
  }
  
  /* Return true if size ARG is always lock free on this architecture.  */
*************** expand_builtin (tree exp, rtx target, rt
*** 6400,6474 ****
      case BUILT_IN_ATOMIC_ADD_FETCH_4:
      case BUILT_IN_ATOMIC_ADD_FETCH_8:
      case BUILT_IN_ATOMIC_ADD_FETCH_16:
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_ADD_FETCH_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, PLUS, true);
!       if (target)
!       return target;
!       break;
!  
      case BUILT_IN_ATOMIC_SUB_FETCH_1:
      case BUILT_IN_ATOMIC_SUB_FETCH_2:
      case BUILT_IN_ATOMIC_SUB_FETCH_4:
      case BUILT_IN_ATOMIC_SUB_FETCH_8:
      case BUILT_IN_ATOMIC_SUB_FETCH_16:
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_SUB_FETCH_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, MINUS, 
true);
!       if (target)
!       return target;
!       break;
!  
      case BUILT_IN_ATOMIC_AND_FETCH_1:
      case BUILT_IN_ATOMIC_AND_FETCH_2:
      case BUILT_IN_ATOMIC_AND_FETCH_4:
      case BUILT_IN_ATOMIC_AND_FETCH_8:
      case BUILT_IN_ATOMIC_AND_FETCH_16:
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_AND_FETCH_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, AND, true);
!       if (target)
!       return target;
!       break;
! 
      case BUILT_IN_ATOMIC_NAND_FETCH_1:
      case BUILT_IN_ATOMIC_NAND_FETCH_2:
      case BUILT_IN_ATOMIC_NAND_FETCH_4:
      case BUILT_IN_ATOMIC_NAND_FETCH_8:
      case BUILT_IN_ATOMIC_NAND_FETCH_16:
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_NAND_FETCH_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, NOT, true);
!       if (target)
!       return target;
!       break;
!  
      case BUILT_IN_ATOMIC_XOR_FETCH_1:
      case BUILT_IN_ATOMIC_XOR_FETCH_2:
      case BUILT_IN_ATOMIC_XOR_FETCH_4:
      case BUILT_IN_ATOMIC_XOR_FETCH_8:
      case BUILT_IN_ATOMIC_XOR_FETCH_16:
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_XOR_FETCH_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, XOR, true);
!       if (target)
!       return target;
!       break;
!  
      case BUILT_IN_ATOMIC_OR_FETCH_1:
      case BUILT_IN_ATOMIC_OR_FETCH_2:
      case BUILT_IN_ATOMIC_OR_FETCH_4:
      case BUILT_IN_ATOMIC_OR_FETCH_8:
      case BUILT_IN_ATOMIC_OR_FETCH_16:
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_OR_FETCH_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, IOR, true);
!       if (target)
!       return target;
!       break;
!  
! 
      case BUILT_IN_ATOMIC_FETCH_ADD_1:
      case BUILT_IN_ATOMIC_FETCH_ADD_2:
      case BUILT_IN_ATOMIC_FETCH_ADD_4:
      case BUILT_IN_ATOMIC_FETCH_ADD_8:
      case BUILT_IN_ATOMIC_FETCH_ADD_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_ADD_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, PLUS, 
false);
        if (target)
        return target;
        break;
--- 6429,6533 ----
      case BUILT_IN_ATOMIC_ADD_FETCH_4:
      case BUILT_IN_ATOMIC_ADD_FETCH_8:
      case BUILT_IN_ATOMIC_ADD_FETCH_16:
!       {
!       enum built_in_function lib;
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_ADD_FETCH_1);
!       lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_ADD_1 + 
!                                      (fcode - BUILT_IN_ATOMIC_ADD_FETCH_1));
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, PLUS, true,
!                                                ignore, lib);
!       if (target)
!         return target;
!       break;
!       }
      case BUILT_IN_ATOMIC_SUB_FETCH_1:
      case BUILT_IN_ATOMIC_SUB_FETCH_2:
      case BUILT_IN_ATOMIC_SUB_FETCH_4:
      case BUILT_IN_ATOMIC_SUB_FETCH_8:
      case BUILT_IN_ATOMIC_SUB_FETCH_16:
!       {
!       enum built_in_function lib;
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_SUB_FETCH_1);
!       lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_SUB_1 + 
!                                      (fcode - BUILT_IN_ATOMIC_SUB_FETCH_1));
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, MINUS, true,
!                                                ignore, lib);
!       if (target)
!         return target;
!       break;
!       }
      case BUILT_IN_ATOMIC_AND_FETCH_1:
      case BUILT_IN_ATOMIC_AND_FETCH_2:
      case BUILT_IN_ATOMIC_AND_FETCH_4:
      case BUILT_IN_ATOMIC_AND_FETCH_8:
      case BUILT_IN_ATOMIC_AND_FETCH_16:
!       {
!       enum built_in_function lib;
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_AND_FETCH_1);
!       lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_AND_1 + 
!                                      (fcode - BUILT_IN_ATOMIC_AND_FETCH_1));
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, AND, true,
!                                                ignore, lib);
!       if (target)
!         return target;
!       break;
!       }
      case BUILT_IN_ATOMIC_NAND_FETCH_1:
      case BUILT_IN_ATOMIC_NAND_FETCH_2:
      case BUILT_IN_ATOMIC_NAND_FETCH_4:
      case BUILT_IN_ATOMIC_NAND_FETCH_8:
      case BUILT_IN_ATOMIC_NAND_FETCH_16:
!       {
!       enum built_in_function lib;
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_NAND_FETCH_1);
!       lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_NAND_1 + 
!                                      (fcode - BUILT_IN_ATOMIC_NAND_FETCH_1));
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, NOT, true,
!                                                ignore, lib);
!       if (target)
!         return target;
!       break;
!       }
      case BUILT_IN_ATOMIC_XOR_FETCH_1:
      case BUILT_IN_ATOMIC_XOR_FETCH_2:
      case BUILT_IN_ATOMIC_XOR_FETCH_4:
      case BUILT_IN_ATOMIC_XOR_FETCH_8:
      case BUILT_IN_ATOMIC_XOR_FETCH_16:
!       {
!       enum built_in_function lib;
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_XOR_FETCH_1);
!       lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_XOR_1 + 
!                                      (fcode - BUILT_IN_ATOMIC_XOR_FETCH_1));
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, XOR, true,
!                                                ignore, lib);
!       if (target)
!         return target;
!       break;
!       }
      case BUILT_IN_ATOMIC_OR_FETCH_1:
      case BUILT_IN_ATOMIC_OR_FETCH_2:
      case BUILT_IN_ATOMIC_OR_FETCH_4:
      case BUILT_IN_ATOMIC_OR_FETCH_8:
      case BUILT_IN_ATOMIC_OR_FETCH_16:
!       {
!       enum built_in_function lib;
!       mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_OR_FETCH_1);
!       lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_OR_1 + 
!                                      (fcode - BUILT_IN_ATOMIC_OR_FETCH_1));
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, IOR, true,
!                                                ignore, lib);
!       if (target)
!         return target;
!       break;
!       }
      case BUILT_IN_ATOMIC_FETCH_ADD_1:
      case BUILT_IN_ATOMIC_FETCH_ADD_2:
      case BUILT_IN_ATOMIC_FETCH_ADD_4:
      case BUILT_IN_ATOMIC_FETCH_ADD_8:
      case BUILT_IN_ATOMIC_FETCH_ADD_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_ADD_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, PLUS, false,
!                                              ignore, BUILT_IN_NONE);
        if (target)
        return target;
        break;
*************** expand_builtin (tree exp, rtx target, rt
*** 6479,6485 ****
      case BUILT_IN_ATOMIC_FETCH_SUB_8:
      case BUILT_IN_ATOMIC_FETCH_SUB_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_SUB_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, MINUS, 
false);
        if (target)
        return target;
        break;
--- 6538,6545 ----
      case BUILT_IN_ATOMIC_FETCH_SUB_8:
      case BUILT_IN_ATOMIC_FETCH_SUB_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_SUB_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, MINUS, 
false,
!                                              ignore, BUILT_IN_NONE);
        if (target)
        return target;
        break;
*************** expand_builtin (tree exp, rtx target, rt
*** 6490,6496 ****
      case BUILT_IN_ATOMIC_FETCH_AND_8:
      case BUILT_IN_ATOMIC_FETCH_AND_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_AND_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, AND, false);
        if (target)
        return target;
        break;
--- 6550,6557 ----
      case BUILT_IN_ATOMIC_FETCH_AND_8:
      case BUILT_IN_ATOMIC_FETCH_AND_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_AND_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, AND, false,
!                                              ignore, BUILT_IN_NONE);
        if (target)
        return target;
        break;
*************** expand_builtin (tree exp, rtx target, rt
*** 6501,6507 ****
      case BUILT_IN_ATOMIC_FETCH_NAND_8:
      case BUILT_IN_ATOMIC_FETCH_NAND_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_NAND_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, NOT, false);
        if (target)
        return target;
        break;
--- 6562,6569 ----
      case BUILT_IN_ATOMIC_FETCH_NAND_8:
      case BUILT_IN_ATOMIC_FETCH_NAND_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_NAND_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, NOT, false,
!                                              ignore, BUILT_IN_NONE);
        if (target)
        return target;
        break;
*************** expand_builtin (tree exp, rtx target, rt
*** 6512,6518 ****
      case BUILT_IN_ATOMIC_FETCH_XOR_8:
      case BUILT_IN_ATOMIC_FETCH_XOR_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_XOR_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, XOR, false);
        if (target)
        return target;
        break;
--- 6574,6581 ----
      case BUILT_IN_ATOMIC_FETCH_XOR_8:
      case BUILT_IN_ATOMIC_FETCH_XOR_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_XOR_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, XOR, false,
!                                              ignore, BUILT_IN_NONE);
        if (target)
        return target;
        break;
*************** expand_builtin (tree exp, rtx target, rt
*** 6523,6529 ****
      case BUILT_IN_ATOMIC_FETCH_OR_8:
      case BUILT_IN_ATOMIC_FETCH_OR_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_OR_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, IOR, false);
        if (target)
        return target;
        break;
--- 6586,6593 ----
      case BUILT_IN_ATOMIC_FETCH_OR_8:
      case BUILT_IN_ATOMIC_FETCH_OR_16:
        mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_OR_1);
!       target = expand_builtin_atomic_fetch_op (mode, exp, target, IOR, false,
!                                              ignore, BUILT_IN_NONE);
        if (target)
        return target;
        break;

Reply via email to