[Bug middle-end/34300] New: gcc 4.2.2 miscompiles code that uses global register variables

bruno at clisp dot org Fri, 30 Nov 2007 02:45:21 -0800

$ gcc -v
Using built-in specs.
Target: i686-pc-linux-gnu
Configured with: ../gcc-4.2.2/configure --prefix=/packages/gnu-inst-gcc/4.2.2
--enable-shared --enable-version-specific-runtime-libs --enable-nls
--enable-threads=posix --enable-__cxa_atexit --with-as=/packages/gnu/bin/as
--with-ld=/packages/gnu/bin/ld --with-gmp=/packages/gnu-inst-gcc/4.2.2
--with-mpfr=/packages/gnu-inst-gcc/4.2.2
--with-ecj-jar=/gfs/ibook/Volumes/ExtData/source/gnu/gcc/sourceware.org-ecj/ecj-latest.jar
Thread model: posix
gcc version 4.2.2
$ gcc bug.c
$ ./a.out ; echo $?
0
$ gcc -O bug.c
$ ./a.out ; echo $?
Aborted
134


================================= bug.c ====================================
typedef unsigned char UBYTE;
typedef long SLONG;
typedef unsigned long ULONG;
typedef ULONG uint32;
typedef SLONG sint32;
typedef sint32 sintL;
typedef uint32 uintL;
typedef sint32 sintP;
typedef uint32 uintP;
typedef void * gcv_object_t;
typedef uintP oint;
typedef uint32 tint;
typedef uint32 aint;
typedef uint32 uintV;
typedef gcv_object_t object;
typedef struct {
  gcv_object_t cdr ;
  gcv_object_t car ;
} cons_;
typedef cons_ * Cons;
typedef struct {
  gcv_object_t GCself;
} symbol_;
typedef symbol_ * Symbol;
typedef struct {
  gcv_object_t GCself;
  gcv_object_t data[2] ;
} svector_;
typedef svector_ * Svector;
register gcv_object_t* STACK __asm__("%ebx");
extern object allocate_vector (uintL len);
struct symbol_tab_ {
  symbol_ S_nil;
} symbol_tab_data = { { (UBYTE*)&symbol_tab_data.S_nil+2 } };
extern void newinsert (object sym, uintL size);
extern void abort (void);
static gcv_object_t stackspace[100];
object rehash_symtab (object symtab)
{
  STACK[0] = symtab, STACK += 1;
  {
    uintL oldsize =
((uintV)(((oint)((((Svector)((oint)(symtab)-2))->data[0]))&((oint)0x20000000UL-1))>>5));
    {
      uintL newsize;
      {
        object size;
        STACK[0] = ((Svector)((oint)(symtab)-2))->data[1], STACK += 1;
        STACK[0] = (object)((UBYTE*)(&symbol_tab_data.S_nil)+2), STACK += 1;
        {
          uint32 prod = oldsize * 205UL;
          newsize = (prod < (1UL<<31) ? prod>>7 : 0xffffffUL);
        }
        newsize = (newsize - 1) | 1 ;
        size = ((gcv_object_t)((oint)(tint)0xC0000000 + ((oint)(aint)(newsize)
<< 5)));
        {
          object newtable = allocate_vector(newsize);
          STACK[0] = newtable, STACK += 1;
        }
        {
          gcv_object_t* offset = 0;
          uintL count;
          count = oldsize;
          do
            {
              object oldentry =
*(gcv_object_t*)(((UBYTE*)(&((Svector)((oint)(((STACK[-3])))-2))->data[0])+((aint)offset)));
              if (((oint)(oldentry) & 3) == 2)
                do
                  {
                    STACK[0] = ((Cons)((oint)(oldentry)-2))->cdr, STACK += 1;
                    ((Cons)((oint)(oldentry)-2))->cdr = STACK[-3];
                    STACK[-3] = oldentry;
                    newinsert(((Cons)((oint)(oldentry)-2))->car,newsize);
                    oldentry = (STACK -= 1, STACK[0]);
                  }
                while (((oint)(oldentry) & 3) == 2);
              offset++;
            }
          while(!(--count==0));
        }
        if (STACK != &stackspace[54])
          abort();
        {
          object newtable = (STACK -= 1, STACK[0]);
          STACK -= 2;
          symtab = (STACK -= 1, STACK[0]);
          ((Svector)((oint)(symtab)-2))->data[0] = size;
          ((Svector)((oint)(symtab)-2))->data[1] = newtable;
        }
        return symtab;
      }
    }
  }
}
object allocate_vector (uintL len)
{
  return 0;
}
void newinsert (object sym, uintL size)
{
  if (STACK != &stackspace[55])
    abort();
}
static cons_ cons2 = { (void*)0, (void*)'2' };
static cons_ cons1 = { (UBYTE*)&cons2+2, (void*)'1' };
static svector_ vector1 = { (UBYTE*)&vector1+2, { (UBYTE*)&cons1+2, 0 } };
static svector_ symtab1 = { (UBYTE*)&symtab1+2, { (void*)0x20000020,
(UBYTE*)&vector1+2 } };
int main ()
{
  STACK = &stackspace[50];
  rehash_symtab((UBYTE*)&symtab1+2);
  return 0;
}
============================================================================

This is a regression, because gcc 3.4.4, gcc 4.0.4, and gcc 4.1.2 all compile
this program fine, even with higher optimization levels (-O2).

What happens is that gcc 4.2.2 keeps a "cache" of the STACK = %ebx variable
in another register, %edi, in the inner loop, around the function call of
newinsert(). Two things are wrong here:
1) It is wrong to assume that newinsert() will leave STACK unmodified.
   newinsert() could also always decrement STACK by 2, for example. You can
   see that the code is unchanged if the newinsert() function is put into a
   different compilation unit.
2) The value cached in %edi and used for this statement
     oldentry = (STACK -= 1, STACK[0]);
   is the value of STACK at the beginning of the loop MINUS 1.
   If assuming that newinsert() does not change the STACK, the value cached
   in %edi should be the value of STACK at the beginning of the loop
   (since there is one increment of STACK before the newinsert call and
   one decrement of STACK after it.

Here's an annotated listing of the rehash_symtab function produced:
========================== gcc -O -S bug.c ================================
...
rehash_symtab:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        subl    $16, %esp
        movl    8(%ebp), %eax                   ; %eax := symtab
        movl    %eax, (%ebx)                    ; STACK[0] = symtab
        leal    4(%ebx), %ecx                   ; STACK += 1
        subl    $2, %eax
        movl    4(%eax), %edx
        andl    $536870911, %edx
        movl    %edx, %edi
        shrl    $5, %edi                        ; %edi := oldsize
        movl    8(%eax), %eax                   ; Get
((Svector)((oint)(symtab)-2))->data[1]
        movl    %eax, (%ecx)                    ; STACK[0] = ...
        addl    $4, %ecx                        ; STACK += 1
        movl    $symbol_tab_data+2, (%ecx)      ; STACK[0] =
(object)((UBYTE*)(&symbol_tab_data.S_nil)+2)
        leal    4(%ecx), %esi                   ; STACK += 1
        movl    %esi, %ebx
        imull   $205, %edi, %edx                ; prod = oldsize * 205UL
        movl    $16777215, %eax
        testl   %edx, %edx
        js      .L10
        movl    %edx, %eax
        shrl    $7, %eax
.L10:                                           ; newsize in %eax
        subl    $1, %eax
        orl     $1, %eax                        ; newsize = (newsize - 1) | 1
        movl    %eax, -12(%ebp)
        movl    %eax, (%esp)
        call    allocate_vector                 ; newtable =
allocate_vector(newsize)
        movl    %eax, (%esi)                    ; STACK[0] = newtable
        addl    $4, %ebx                        ; STACK += 1
        movl    %edi, %esi
        movl    $0, -16(%ebp)                   ; offset = 0
.L11:
        movl    -12(%ebx), %eax
        movl    -16(%ebp), %ecx
        movl    2(%ecx,%eax), %edx              ; %edx := oldentry
        movl    %edx, %eax
        andl    $3, %eax
        cmpl    $2, %eax
        jne     .L12                            ; (((oint)(oldentry) & 3) == 2)
        leal    -4(%ebx), %edi                  ; HERE! %edi := STACK - 1
.L19:                                           ; BEGINNING OF INNER LOOP
        movl    -2(%edx), %eax
        movl    %eax, (%ebx)                    ; STACK[0] =
((Cons)((oint)(oldentry)-2))->cdr
        leal    4(%ebx), %eax
        movl    %eax, %ebx                      ; STACK += 1
        movl    -12(%eax), %eax
        movl    %eax, -2(%edx)                  ;
((Cons)((oint)(oldentry)-2))->cdr = STACK[-3]
        movl    %edx, -12(%ebx)                 ; STACK[-3] = oldentry
        movl    -12(%ebp), %eax                 ; newsize
        movl    %eax, 4(%esp)
        movl    2(%edx), %eax
        movl    %eax, (%esp)
        call    newinsert                       ;
newinsert(((Cons)((oint)(oldentry)-2))->car,newsize)
        movl    %edi, %ebx                      ; HERE! BUG! WRONG VALUE FOR
STACK!
        movl    (%edi), %edx                    ; oldentry = STACK[0]
        movl    %edx, %eax
        andl    $3, %eax
        cmpl    $2, %eax
        je      .L19                            ; (((oint)(oldentry) & 3) == 2)
.L12:                                           ; END OF INNER LOOP
        subl    $1, %esi
        je      .L14                            ; while(!(--count==0))
        addl    $4, -16(%ebp)                   ; offset++
        jmp     .L11
.L14:
        cmpl    $stackspace+216, %ebx           ; (STACK != &stackspace[54])
        .p2align 4,,2
        je      .L16
        call    abort
.L16:
        movl    stackspace+212, %esi
        movl    $stackspace+200, %ebx
        movl    stackspace+200, %eax
        leal    -2(%eax), %ecx
        movl    -12(%ebp), %edx
        sall    $5, %edx
        subl    $1073741824, %edx               ; %edx := size
        movl    %edx, 4(%ecx)
        movl    %esi, 8(%ecx)
        addl    $16, %esp
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
...
============================================================================


-- 
           Summary: gcc 4.2.2 miscompiles code that uses global register
                    variables
           Product: gcc
           Version: 4.2.2
            Status: UNCONFIRMED
          Severity: major
          Priority: P3
         Component: middle-end
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: bruno at clisp dot org
 GCC build triplet: i686-pc-linux-gnu
  GCC host triplet: i686-pc-linux-gnu
GCC target triplet: i686-pc-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34300

[Bug middle-end/34300] New: gcc 4.2.2 miscompiles code that uses global register variables

Reply via email to