More results:

I get the same strange behaviour when I compile with -march=pentium2. I
could narrow it down to lib/GL/mesa/src/drv/radeon/radeon_state.c. Then
I split radeon_state.c and put only one non-static function at a time in
a new radeon_state2.c along with the necessary static functions. Only
radeon_state2 was compiled with -march=athlon.

I could reproduce the error with only radeonUpdateScissor and
radeonUpdateViewportOffset in radeon_state2.c. On the other hand I don't
get the error if I compile everything with -march=athlon except
radeon_state2.c which means that I've neatly isolated the problem :)

I had gcc generate assembler output for radeon_state2.c with
-mcpu=athlon and -march=athlon. A diff of the two assembler files and
radeon_state2.c (for comparing line numbers) are attached. The version
compiled with -march=athlon uses %mm0, the other one doesn't. My guess
is that some other part of the radeon driver or Mesa makes assumptions
about the MMX state which are not true when compiling with
-march=athlon.

I tried disabling MesaUse3DNow and MesaUseMMX in host.def, but that
didn't help.

Regards,
   Felix

On Sat, 12 Oct 2002 18:57:45 +0200
Felix K�hling <[EMAIL PROTECTED]> wrote:

> Hello,
> 
> I've looked into my gcc-3.2 problem again and found out that gcc-3.2
> with -march=athlon produces the problem I described in every detail in a
> previos mail (wirebox example).
> 
> What made this so tedious to find (the problem was appearing and
> disappearing arbitrarily) is an inconsistency in the Makefiles. When I
> run a global make from xc/xc it uses the optimization options I
> specified in host.def (-O2 -march=athlon). When I run make locally in a
> subdirectory (I tried lib/GL/mesa/src/drv[/radeon]) it uses only -O2.
> 
> I looked into the local Makefile and found that the definition of CFLAGS
> appears twice in the Makefile. The first one specifies -O2 -march=athlon
> in CDEBUGFLAGS, the second one specifies only -O2. So in effect a local
> make uses only -O2. Doing a global make CDEBUGFLAGS is specified on make
> the command line and of make for all subdirectories in xc/xc/xmakefile:
> 
>         for i in $(SUBDIRS) ;\
>         do \
>         echo "making" all "in $(CURRENT_DIR)/$$i..."; \
>         $(MAKE) -C $$i $(MFLAGS) $(PARALLELMFLAGS) CDEBUGFLAGS="$(CDEBUGFLAGS)" 
> all; \
>         done
> 
> This overrides the variable definitions of CDEBUGFLAGS in all
> subdirectories.
> 
> Now we still have to find the exact cause of the problem with
> -march=athlon. First of all, can anyone reproduce it?


               __\|/__    ___     ___     ___
__Tsch��_______\_6 6_/___/__ \___/__ \___/___\___You can do anything,___
_____Felix_______\�/\ \_____\ \_____\ \______U___just not everything____
  [EMAIL PROTECTED]    >o<__/   \___/   \___/        at the same time!

--- radeon_state2_noathlon.s    2002-10-13 01:11:30.000000000 +0200
+++ radeon_state2_athlon.s      2002-10-13 01:04:03.000000000 +0200
@@ -183,17 +183,17 @@
        .loc 1 83 0
        movl    %eax, -16(%ebp)
        .loc 1 85 0
-       movl    12760(%ebx), %edi
+       movl    12760(%ebx), %esi
        .loc 1 86 0
-       fildl   28(%edi)
+       fildl   28(%esi)
        fstps   -24(%ebp)
        .loc 1 87 0
-       fildl   32(%edi)
+       fildl   32(%esi)
        .loc 1 86 0
        movl    -24(%ebp), %ecx
        .loc 1 87 0
        fsts    -24(%ebp)
-       fildl   40(%edi)
+       fildl   40(%esi)
        faddp   %st, %st(1)
        fsts    -24(%ebp)
        .loc 1 88 0
@@ -212,25 +212,23 @@
        .loc 1 93 0
        movl    216(%ebx), %edx
        .loc 1 91 0
-       movl    -24(%ebp), %esi
+       movd    -24(%ebp), %mm0
        .loc 1 93 0
        fildl   8(%edx)
        movl    %ecx, -24(%ebp)
        flds    -24(%ebp)
        fxch    %st(1)
-       fucompp
-       fnstsw  %ax
-       andb    $69, %ah
-       xorb    $64, %ah
+       fucomip %st(1), %st
+       fstp    %st(0)
        jne     .L14
+       jp      .L14
        fildl   16(%edx)
-       movl    %esi, -24(%ebp)
+       movd    %mm0, -24(%ebp)
        flds    -24(%ebp)
        fxch    %st(1)
-       fucompp
-       fnstsw  %ax
-       andb    $69, %ah
-       cmpb    $64, %ah
+       fucomip %st(1), %st
+       fstp    %st(0)
+       jp      .L14
        je      .L13
 .L14:
        .loc 1 105 0
@@ -240,7 +238,7 @@
 .LBE6:
        movl    %ecx, 8(%edx)
        .loc 1 100 0
-       movl    %esi, 16(%edx)
+       movd    %mm0, 16(%edx)
        .loc 1 111 0
 .LBB7:
        movl    $31, %edx
@@ -248,18 +246,18 @@
        .loc 1 105 0
        movl    %eax, -20(%ebp)
        .loc 1 107 0
-       movl    4(%eax), %esi
+       movl    4(%eax), %edi
        .loc 1 111 0
-       movl    28(%edi), %eax
+       movl    28(%esi), %eax
        decl    %eax
        .loc 1 107 0
-       andl    $-7968, %esi
+       andl    $-7968, %edi
        .loc 1 111 0
        andl    $31, %eax
        subl    %eax, %ecx
        .loc 1 112 0
-       movl    40(%edi), %eax
-       addl    32(%edi), %eax
+       movl    40(%esi), %eax
+       addl    32(%esi), %eax
        decl    %eax
        andl    $31, %eax
        subl    %eax, %edx
@@ -269,14 +267,14 @@
        .loc 1 118 0
        movl    -20(%ebp), %edx
        .loc 1 115 0
-       orl     %ecx, %esi
+       orl     %ecx, %edi
        .loc 1 118 0
-       cmpl    %esi, 4(%edx)
+       cmpl    %edi, 4(%edx)
        je      .L13
        .loc 1 119 0
        movl    2408(%ebx), %eax
        testl   %eax, %eax
-       jne     .L22
+       jne     .L23
 .L17:
        movl    260(%ebx), %edx
        movl    264(%ebx), %eax
@@ -292,7 +290,7 @@
        movl    %eax, 4(%edx)
        .loc 1 120 0
        movl    -20(%ebp), %edx
-       movl    %esi, 4(%edx)
+       movl    %edi, 4(%edx)
 .L13:
        .loc 1 125 0
 .LBE7:
@@ -305,7 +303,7 @@
        leave
        jmp     radeonUpdateScissor
        .p2align 6,,7
-.L22:
+.L23:
 .LBB8:
        movl    %ebx, (%esp)
        call    *2408(%ebx)
@@ -62303,7 +62301,7 @@
        .byte   0x55
        .long   0x134f2
        .byte   0x1
-       .byte   0x57
+       .byte   0x56
        .uleb128 0x37
        .long   .LC241
        .byte   0x1
@@ -62353,7 +62351,7 @@
        .byte   0x69
        .long   0xf82a
        .byte   0x1
-       .byte   0x56
+       .byte   0x57
        .byte   0x0
        .byte   0x0
        .uleb128 0x3c
/* $XFree86$ */
/*
 * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
 *
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *    Gareth Hughes <[EMAIL PROTECTED]>
 *    Keith Whitwell <[EMAIL PROTECTED]>
 */

#include "radeon_context.h"
#include "radeon_ioctl.h"
#include "radeon_state.h"
#include "radeon_tcl.h"
#include "radeon_tex.h"
#include "radeon_swtcl.h"
#include "radeon_vtxfmt.h"

#include "mem.h"
#include "mmath.h"
#include "enums.h"
#include "colormac.h"
#include "light.h"
#include "api_arrayelt.h"

#include "swrast/swrast.h"
#include "array_cache/acache.h"
#include "tnl/tnl.h"
#include "tnl/t_pipeline.h"
#include "swrast_setup/swrast_setup.h"


#define MODEL_PROJ 0
#define MODEL      1
#define MODEL_IT   2
#define TEXMAT_0   3
#define TEXMAT_1   4
#define TEXMAT_2   5


static void radeonUpdateScissor( GLcontext *ctx )
{
   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);

   if ( rmesa->dri.drawable ) {
      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;

      int x = ctx->Scissor.X;
      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
      int h = dPriv->h - ctx->Scissor.Y - 1;

      rmesa->state.scissor.rect.x1 = x + dPriv->x;
      rmesa->state.scissor.rect.y1 = y + dPriv->y;
      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;

      radeonRecalcScissorRects( rmesa );
   }
}

void radeonUpdateViewportOffset( GLcontext *ctx )
{
   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
   GLfloat xoffset = (GLfloat)dPriv->x;
   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
   const GLfloat *v = ctx->Viewport._WindowMap.m;

   GLfloat tx = v[MAT_TX] + xoffset;
   GLfloat ty = (- v[MAT_TY]) + yoffset;

   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx ||
	rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty )
   {
      /* Note: this should also modify whatever data the context reset
       * code uses...
       */
      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
      
      /* update polygon stipple x/y screen offset */
      {
         GLuint stx, sty;
         GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];

         m &= ~(RADEON_STIPPLE_X_OFFSET_MASK |
                RADEON_STIPPLE_Y_OFFSET_MASK);

         /* add magic offsets, then invert */
         stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
                     & RADEON_STIPPLE_COORD_MASK);

         m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
               (sty << RADEON_STIPPLE_Y_OFFSET_SHIFT));

         if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
            RADEON_STATECHANGE( rmesa, msc );
	    rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
         }
      }
   }

   radeonUpdateScissor( ctx );
}

Reply via email to