#include <stdint.h>
#include <stdio.h>

uint8_t nibble(size_t addr) {
  size_t byte_addr;
  uint8_t bit;
  __asm__("shr $1, %[addr]; setc %[bit]\n"
          : [addr] "=rm" (byte_addr), [bit] "=rm" (bit)
          : "[addr]" (addr)
          : "cc");
  uint8_t byte=((uint8_t *) byte_addr)[0];
  byte >>= bit;
  byte >>= bit;
  byte >>= bit;
  byte >>= bit;
  return byte & 15;
}

int main() {
  return 0;
}


Generated code for nibble() at -O3 (Intel syntax):

  400480:       48 d1 ef                shr    rdi,1
  400483:       0f 92 c1                setb   cl
  400486:       0f b6 07                movzx  eax,BYTE PTR [rdi]
  400489:       0f b6 c9                movzx  ecx,cl
  40048c:       d3 f8                   sar    eax,cl
  40048e:       0f b6 c0                movzx  eax,al
  400491:       d3 f8                   sar    eax,cl
  400493:       0f b6 c0                movzx  eax,al
  400496:       d3 f8                   sar    eax,cl
  400498:       0f b6 c0                movzx  eax,al
  40049b:       d3 f8                   sar    eax,cl
  40049d:       83 e0 0f                and    eax,0xf
  4004a0:       c3                      ret


Suggested code:

  400480:       48 d1 ef                shr    rdi,1
  400483:       0f 92 c1                setb   cl
  400486:       0f b6 07                movzx  eax,BYTE PTR [rdi]
  4004xx:       d3 f8                   sar    eax,cl
  4004xx:       d3 f8                   sar    eax,cl
  4004xx:       d3 f8                   sar    eax,cl
  4004xx:       d3 f8                   sar    eax,cl
  4004xx:       83 e0 0f                and    eax,0xf
  4004xx:       c3                      ret

[Alternatively multiply CL by 4 and perform one right shift by CL]

I do not see a partial register stall in the suggested code that the additional
movzx instructions address.


-- 
           Summary: multiple movzx instructions generated when no partial
                    register stall is evident
           Product: gcc
           Version: 4.4.1
            Status: UNCONFIRMED
          Severity: trivial
          Priority: P3
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: adam at consulting dot net dot nz
 GCC build triplet: core2
  GCC host triplet: linux
GCC target triplet: x86_64


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40170

Reply via email to