----- Am 18. Aug 2025 um 9:33 schrieb Jørgen Kvalsvik j...@lambda.is:

> On 8/18/25 02:58, Sebastian Huber wrote:
>> Hello,
>> 
>> I have question to the counters used for the condition coverage 
>> implementation
>> in tree-profile.cc
>> 
>> /* Stores the incoming edge and previous counters (in SSA form) on that edge
>>     for the node e->deston that edge for the node e->dest.  The counters 
>> record
>>     the seen-true (0), seen-false (1), and current-mask (2).  They are 
>> stored in
>>     an array rather than proper members for access-by-index as the code paths
>>     tend to be identical for the different counters.  */
>> struct counters
>> {
>>      edge e;
>>      tree counter[3];
>>      tree& operator [] (size_t i) { return counter[i]; }
>> };
>> 
>> While working on the -fprofile-update=atomic support for 32-bit targets which
>> lack support for 64-bit atomic operations, I noticed that some atomic
>> no-operations are generated for the instrumented code
>> (https://gcc.gnu.org/pipermail/gcc-patches/2025-August/692555.html). For
>> example:
>> 
>> int a(int i);
>> int b(int i);
>> 
>> int f(int i)
>> {
>>      if (i) {
>>        return a(i);
>>      } else {
>>        return b(i);
>>      }
>> }
>> 
>> gcc -O2 -fprofile-update=atomic -fcondition-coverage -S -o - test.c
>> -fdump-tree-all
>> 
>> ;; Function f (f, funcdef_no=0, decl_uid=4621, cgraph_uid=1, symbol_order=0)
>> 
>> int f (int i)
>> {
>>    int _1;
>>    int _6;
>>    int _8;
>> 
>>    <bb 2> [local count: 1073741824]:
>>    if (i_3(D) != 0)
>>      goto <bb 3>; [50.00%]
>>    else
>>      goto <bb 4>; [50.00%]
>> 
>>    <bb 3> [local count: 536870912]:
>>    __atomic_fetch_or_8 (&__gcov8.f[0], 1, 0);
>>    __atomic_fetch_or_8 (&__gcov8.f[1], 0, 0);
>>    _8 = a (i_3(D)); [tail call]
>>    goto <bb 5>; [100.00%]
>> 
>>    <bb 4> [local count: 536870912]:
>>    __atomic_fetch_or_8 (&__gcov8.f[0], 0, 0);
>>    __atomic_fetch_or_8 (&__gcov8.f[1], 1, 0);
>>    _6 = b (0); [tail call]
>> 
>>    <bb 5> [local count: 1073741824]:
>>    # _1 = PHI <_8(3), _6(4)>
>>    return _1;
>> 
>> }
>> 
>> The __atomic_fetch_or_8 (&__gcov8.f[1], 0, 0) and __atomic_fetch_or_8
>> (&__gcov8.f[0], 0, 0) could be optimized away. Since GCC is able to figure 
>> out
>> that the masks are compile-time constants wouldn't it be possible to use a
>> simple uint64_t for the current-mask (2) in struct counters?
> 
> Is this something you're seeing consistently, even when the number of
> conditions go up?

I only looked at simple test cases while working on the support for splitting 
up the atomic operations. In a slightly more complicated test function

int a(int i);
int b(int i);
int c(int i);

int f(int i, int j, int k, int l, int m)
{
  if (i && j && (k || !l || m)) {
    if (a(i) || b(i) || l) {
      return a(i);
    } {
      return c(i);
    }
  } else if (k && m) {
    return b(i);
  } else {
    return 123;
  }
}

all the masks are still compile-time constants:

;; Function f (f, funcdef_no=0, decl_uid=4627, cgraph_uid=1, symbol_order=0)

Removing basic block 20
int f (int i, int j, int k, int l, int m)
{
  _Bool _1;
  _Bool _2;
  _Bool _3;
  _Bool _5;
  _Bool _6;
  int _7;
  int _8;
  _Bool _10;
  _Bool _11;
  int _12;
  int _24;
  int _26;
  int _28;
  _Bool _76;

  <bb 2> [local count: 1073741822]:
  _1 = i_15(D) != 0;
  _2 = j_16(D) != 0;
  _3 = _1 & _2;
  _76 = k_17(D) != 0;
  if (_3 != 0)
    goto <bb 3>; [50.00%]
  else
    goto <bb 16>; [50.00%]

  <bb 3> [local count: 536870911]:
  _5 = l_18(D) == 0;
  _6 = _5 | _76;
  if (_6 != 0)
    goto <bb 4>; [33.00%]
  else
    goto <bb 5>; [67.00%]

  <bb 4> [local count: 177167400]:
  __atomic_fetch_or_8 (&__gcov8.f[0], 3, 0);
  __atomic_fetch_or_8 (&__gcov8.f[1], 0, 0);
  goto <bb 8>; [100.00%]

  <bb 5> [local count: 359703512]:
  if (m_19(D) != 0)
    goto <bb 7>; [50.00%]
  else
    goto <bb 6>; [50.00%]

  <bb 6> [local count: 179851756]:
  __atomic_fetch_or_8 (&__gcov8.f[0], 0, 0);
  __atomic_fetch_or_8 (&__gcov8.f[1], 6, 0);
  goto <bb 17>; [100.00%]

  <bb 7> [local count: 179851756]:
  __atomic_fetch_or_8 (&__gcov8.f[0], 5, 0);
  __atomic_fetch_or_8 (&__gcov8.f[1], 0, 0);

  <bb 8> [local count: 357019156]:
  _7 = a (i_15(D));
  if (_7 != 0)
    goto <bb 9>; [34.00%]
  else
    goto <bb 10>; [66.00%]

  <bb 9> [local count: 121386514]:
  __atomic_fetch_or_8 (&__gcov8.f[2], 1, 0);
  __atomic_fetch_or_8 (&__gcov8.f[3], 0, 0);
  goto <bb 14>; [100.00%]

  <bb 10> [local count: 235632641]:
  _8 = b (i_15(D));
  if (_8 != 0)
    goto <bb 11>; [34.00%]
  else
    goto <bb 12>; [66.00%]

  <bb 11> [local count: 80115099]:
  __atomic_fetch_or_8 (&__gcov8.f[2], 2, 0);
  __atomic_fetch_or_8 (&__gcov8.f[3], 0, 0);
  goto <bb 14>; [100.00%]

  <bb 12> [local count: 155517543]:
  if (l_18(D) != 0)
    goto <bb 13>; [67.00%]
  else
    goto <bb 15>; [33.00%]

  <bb 13> [local count: 104196754]:
  __atomic_fetch_or_8 (&__gcov8.f[2], 4, 0);
  __atomic_fetch_or_8 (&__gcov8.f[3], 0, 0);

  <bb 14> [local count: 305698367]:
  _26 = a (i_15(D)); [tail call]
  goto <bb 19>; [100.00%]

  <bb 15> [local count: 51320789]:
  __atomic_fetch_or_8 (&__gcov8.f[2], 0, 0);
  __atomic_fetch_or_8 (&__gcov8.f[3], 7, 0);
  _24 = c (i_15(D)); [tail call]
  goto <bb 19>; [100.00%]

  <bb 16> [local count: 536870911]:
  __atomic_fetch_or_8 (&__gcov8.f[0], 0, 0);
  __atomic_fetch_or_8 (&__gcov8.f[1], 1, 0);
  _10 = m_19(D) != 0;
  _11 = _10 & _76;
  if (_11 != 0)
    goto <bb 18>; [63.77%]
  else
    goto <bb 17>; [36.23%]

  <bb 17> [local count: 374344247]:
  __atomic_fetch_or_8 (&__gcov8.f[4], 0, 0);
  __atomic_fetch_or_8 (&__gcov8.f[5], 1, 0);
  goto <bb 19>; [100.00%]

  <bb 18> [local count: 342378420]:
  __atomic_fetch_or_8 (&__gcov8.f[4], 1, 0);
  __atomic_fetch_or_8 (&__gcov8.f[5], 0, 0);
  _28 = b (i_15(D)); [tail call]

  <bb 19> [local count: 1073741824]:
  # _12 = PHI <_26(14), _24(15), _28(18), 123(17)>
  return _12;

}

> 
> I'm sure it's possible to optimize out this case by checking if the
> current-mask still is the initial zero constant. The inputs that
> determine the current-mask are constant and tied to the node, but the
> final state of current-mask once the counters are flushed depends on the
> path taken. I'll try to think a bit more about it, but I don't think it
> can be replaced by a uint64_t without a redesign of the
> instrument_decisions function.

Ok, thanks for having a look at it.

-- 
embedded brains GmbH & Co. KG
Herr Sebastian HUBER
Dornierstr. 4
82178 Puchheim
Germany
email: sebastian.hu...@embedded-brains.de
phone: +49-89-18 94 741 - 16
fax:   +49-89-18 94 741 - 08

Registergericht: Amtsgericht München
Registernummer: HRB 157899
Vertretungsberechtigte Geschäftsführer: Peter Rasmussen, Thomas Dörfler
Unsere Datenschutzerklärung finden Sie hier:
https://embedded-brains.de/datenschutzerklaerung/

Reply via email to