66192 - Remove TARGET_RELAXED_ORDERING and use load acquires.

Ramana Radhakrishnan Fri, 22 May 2015 08:24:08 -0700


On 22/05/15 15:28, Jason Merrill wrote:

On 05/22/2015 09:55 AM, David Edelsohn wrote:

On Fri, May 22, 2015 at 9:40 AM, Jason Merrill <[email protected]> wrote:

On 05/22/2015 07:23 AM, Ramana Radhakrishnan wrote:


+  /* Load the guard value only through an atomic acquire load.  */
+  guard = build_atomic_load (guard, MEMMODEL_ACQUIRE);
+
     /* Check to see if the GUARD is zero.  */
     guard = get_guard_bits (guard);



I wonder if these calls should be reversed, to express that we're only
trying to atomically load a byte (on non-ARM targets)?


That expresses the semantics more directly, but will that lead to less
efficient code on some RISC architectures?


I'm not sure.  I would expect that the target would use a larger load
and mask it if that is better for performance, but I don't know.

I do notice that get_guard_bits after build_atomic_load just won't work
on non-ARM targets, as it ends up trying to take the address of a value.

Jason




So on powerpc where targetm.guard_mask_bit is false - this is what I see.


{
  static int * p;

    static int * p;

if (<<cleanup_point (unsigned char) *(char *) &(long long int)__atomic_load_8 (&_ZGVZ1fvE1p, 2) == 0>>)

    {
      if (<<cleanup_point __cxa_guard_acquire (&_ZGVZ1fvE1p) != 0>>)
        {
          <<cleanup_point <<< Unknown tree: expr_stmt

TARGET_EXPR <D.2846, 0>;, p = (int *) operator new (4);;, D.2846 =1;;, __cxa_guard_release (&_ZGVZ1fvE1p); >>>>>;

        }
    }
  return <retval> = p;
}

with the following output - David - is this what you would expect ?


0:      addis 2,12,.TOC.-0b@ha
        addi 2,2,.TOC.-0b@l
        .localentry     _Z1fv,.-_Z1fv
        mflr %r0
        std %r30,-16(%r1)
        std %r31,-8(%r1)
        .cfi_register 65, 0
        .cfi_offset 30, -16
        .cfi_offset 31, -8
        addis %r30,%r2,_ZGVZ1fvE1p@toc@ha
        addi %r30,%r30,_ZGVZ1fvE1p@toc@l
        std %r0,16(%r1)
        stdu %r1,-64(%r1)
        .cfi_def_cfa_offset 64
        .cfi_offset 65, 16

addis %r10,%r2,_ZGVZ1fvE1p@toc@ha # gpr loadfusion, type long

        ld %r10,_ZGVZ1fvE1p@toc@l(%r10)
        cmpw %cr7,%r10,%r10
        bne- %cr7,$+4
        isync
        rlwinm %r9,%r10,0,0xff
        std %r10,32(%r1)
        cmpwi %cr7,%r9,0
        beq %cr7,.L11
.L9:
        addi %r1,%r1,64
        .cfi_remember_state
        .cfi_def_cfa_offset 0

addis %r3,%r2,_ZZ1fvE1p@toc@ha # gpr load fusion, typelong

        ld %r3,_ZZ1fvE1p@toc@l(%r3)
        ld %r0,16(%r1)
        ld %r30,-16(%r1)
        ld %r31,-8(%r1)
        mtlr %r0
        .cfi_restore 65
        .cfi_restore 31
        .cfi_restore 30
        blr
        .p2align 4,,15
.L11:
        .cfi_restore_state
        mr %r3,%r30




And on AArch64 which is where guard_mask_bit is true.


 static int * p;

    static int * p;

if (<<cleanup_point ((long long int) __atomic_load_8 (&_ZGVZ1fvE1p,2) & 1) == 0>>)

    {
      if (<<cleanup_point __cxa_guard_acquire (&_ZGVZ1fvE1p) != 0>>)
        {
          <<cleanup_point <<< Unknown tree: expr_stmt

TARGET_EXPR <D.3168, 0>;, p = (int *) operator new (4);;, D.3168 =1;;, __cxa_guard_release (&_ZGVZ1fvE1p); >>>>>;

        }
    }
  return <retval> = p;
}

Alternatively I can change this to an atomic_load of just the byterequired but I'm not sure if that's supported on all targets.

I'm going to be running away shortly for the long weekend here, so willresume discussions/experiments on Tuesday.



regards
Ramana

Re: [RFC / CFT] PR c++/66192 - Remove TARGET_RELAXED_ORDERING and use load acquires.

Reply via email to