Dave Korn wrote:

  Yes, I think so; aggregation is the right word for it.  Or maybe
scalarization.  If you wrap all these chars in a struct, can SRA handle it?


I tried

#######################################################################
/* file testmanychar.c */
extern void g (int, char *, char *, char *);

void
f (void)
{
  struct {
    char x0, x1, x2, x3, x4, x5, x6, x7;
  } s = {0,0,0,0,0,0,0};
  /* assuming s.x0 is word aligned on a x86_64, and variables are
     bytes in memory, we could clear all the variables in one machine
     instruction */
  g (10, &s.x0, &s.x1, &s.x2);
  g (20, &s.x2, &s.x3, &s.x4);
  g (30, &s.x4, &s.x5, &s.x6);
  g (40, &s.x6, &s.x7, &s.x0);
}
#######################################################################

I compiled it with
 gcc-trunk -S -std=gnu99 -O3 -fverbose-asm \
        -march=core2 -mtune=core2 testmanychar.c

and I got:
#######################################################################
.globl f
        .type   f, @function
f:
.LFB0:
        .cfi_startproc
        movq    %rbx, -24(%rsp) #,
        movq    %rbp, -16(%rsp) #,
        movq    %r12, -8(%rsp)  #,
        movl    $10, %edi       #,
        subq    $40, %rsp       #,
        .cfi_def_cfa_offset 48
        leaq    2(%rsp), %rbp   #, tmp59
        .cfi_offset 12, -16
        .cfi_offset 6, -24
        .cfi_offset 3, -32
        leaq    4(%rsp), %r12   #, tmp64
        leaq    1(%rsp), %rdx   #, tmp61
        movq    %rbp, %rcx      # tmp59,
        movq    %rsp, %rsi      #,
        movq    $0, (%rsp)      #, s
        call    g       #
        leaq    3(%rsp), %rdx   #, tmp66
        movq    %r12, %rcx      # tmp64,
        movq    %rbp, %rsi      # tmp59,
        movl    $20, %edi       #,
        leaq    6(%rsp), %rbp   #, tmp70
        call    g       #
        leaq    5(%rsp), %rdx   #, tmp72
        movq    %rbp, %rcx      # tmp70,
        movq    %r12, %rsi      # tmp64,
        movl    $30, %edi       #,
        call    g       #
        leaq    7(%rsp), %rdx   #, tmp77
        movq    %rsp, %rcx      #,
        movq    %rbp, %rsi      # tmp70,
        movl    $40, %edi       #,
        call    g       #
        movq    16(%rsp), %rbx  #,
        movq    24(%rsp), %rbp  #,
        movq    32(%rsp), %r12  #,
        addq    $40, %rsp       #,
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE0:
        .size   f, .-f
.ident "GCC: (GNU) 4.5.0 20100309 (experimental) [trunk revision 157303]"

#######################################################################

So it seems indeed that a structure can be cleared efficiently. However, having a structure has probably also some shortcommings: I would guess that it is much harder for GCC to keep my flags in registers only.

Regards.

--
Basile STARYNKEVITCH         http://starynkevitch.net/Basile/
email: basile<at>starynkevitch<dot>net mobile: +33 6 8501 2359
8, rue de la Faiencerie, 92340 Bourg La Reine, France
*** opinions {are only mines, sont seulement les miennes} ***

Reply via email to