https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95887

            Bug ID: 95887
           Summary: suboptimal memcmp with embedded zero bytes
           Product: gcc
           Version: 10.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: msebor at gcc dot gnu.org
  Target Milestone: ---

Similar to pr95886, the memcmp expansion into compare-by-pieces is less than
optimal for sequences containing embedded null bytes.  For example, in the test
case below, the memcmp call in f() is expanded into what looks like a more
efficient sequence than the equivalent memcmp call in g().  The only difference
between the two is that the former copies a sequence of non-zero bytes while
among the bytes copied by the latter is a null byte.  Clang emits the same code
for g() as GCC does for f().

I believe the root cause of the problem in both cases is working with
nul-terminated strings (using the result of c_getstr() without the size of what
it points to) instead of with arbitrary byte sequences.

$ cat z.c && gcc -O2 -S -Wall -fdump-tree-optimized=/dev/stdout -o/dev/stdout
z.c
const char a[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
const char b[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };

int f (void *d)
{
  return __builtin_memcmp (d, a, 8);
}

int g (void *d)
{
  return __builtin_memcmp (d, b, 8);
}


        .file   "z.c"
        .text

;; Function f (f, funcdef_no=0, decl_uid=1932, cgraph_uid=1, symbol_order=2)

f (void * d)
{
  int _3;

  <bb 2> [local count: 1073741824]:
  _3 = __builtin_memcmp (d_2(D), &a, 8); [tail call]
  return _3;

}


        .p2align 4
        .globl  f
        .type   f, @function
f:
.LFB0:
        .cfi_startproc
        movl    $8, %edx
        movl    $a, %esi
        jmp     memcmp
        .cfi_endproc
.LFE0:
        .size   f, .-f

;; Function g (g, funcdef_no=1, decl_uid=1935, cgraph_uid=2, symbol_order=3)

g (void * d)
{
  int _3;

  <bb 2> [local count: 1073741824]:
  _3 = __builtin_memcmp (d_2(D), &b, 8); [tail call]
  return _3;

}


        .p2align 4
        .globl  g
        .type   g, @function
g:
.LFB1:
        .cfi_startproc
        movzbl  (%rdi), %eax
        ret
        .cfi_endproc
.LFE1:
        .size   g, .-g
        .globl  b
        .section        .rodata
        .align 8
        .type   b, @object
        .size   b, 8
b:
        .string ""
        .ascii  "\001\002\003\004\005\006\007"
        .globl  a
        .align 8
        .type   a, @object
        .size   a, 8
a:
        .ascii  "\001\002\003\004\005\006\007\b"
        .ident  "GCC: (GNU) 10.1.1 20200527"
        .section        .note.GNU-stack,"",@progbits

Reply via email to