https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117839

            Bug ID: 117839
           Summary: Redundant vector XOR instructions
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
  Target Milestone: ---
            Target: x86-64

[hjl@gnu-tgl-3 zero-1]$ cat z.c
#include <stddef.h>
#include <string.h>

float
clear_memory (void *mem, size_t clearsize)
{
  /* Unroll clear memory size up to 9 * size_t bytes.  We know
     that contents have an odd number of size_t-sized words;
     minimally 3 words.  */
  size_t *d = (size_t *) mem;
  size_t nclears = clearsize / sizeof (size_t);

  if (nclears > 17)
    memset (mem, 0, clearsize);

  /* Use overlapping stores with 2 branch, instead of up to 6.  */
  *(d + 0) = 0;
  *(d + 1) = 0;
  *(d + 2) = 0;
  if (nclears > 9)
    {
      *(d + 5) = 0;
      *(d + 5 + 1) = 0;
      *(d + 5 + 2) = 0;
      *(d + 5 + 3) = 0;
      *(d + nclears - 8) = 0;
      *(d + nclears - 8 + 1) = 0;
      *(d + nclears - 8 + 2) = 0;
      *(d + nclears - 8 + 3) = 0;
    }
  else
    {
      *(d + 1) = 0;
      *(d + 2) = 0;
      *(d + 3) = 0;
      *(d + 4) = 0;
      *(d + nclears - 4) = 0;
      *(d + nclears - 4 + 1) = 0;
      *(d + nclears - 4 + 2) = 0;
      *(d + nclears - 4 + 3) = 0;
    }

  return nclears;
}
[hjl@gnu-tgl-3 zero-1]$ make y.s
/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/ -O2 
-march=x86-64-v3 -S y.c
[hjl@gnu-tgl-3 zero-1]$ grep xor y.s
        vpxor   %xmm0, %xmm0, %xmm0
        vxorps  %xmm0, %xmm0, %xmm0
        vpxor   %xmm0, %xmm0, %xmm0
        vxorps  %xmm0, %xmm0, %xmm0
        xorl    %esi, %esi
        vpxor   %xmm0, %xmm0, %xmm0
        vpxor   %xmm0, %xmm0, %xmm0
        vxorps  %xmm0, %xmm0, %xmm0
[hjl@gnu-tgl-3 zero-1]$ 

There are 7 vector XOR instructions.  But one is sufficient.

Reply via email to