Ian Romanick wrote:

The one caveat with this patch is the x86 & SSE codegen is disabled for all TexCoord and MultiTexCoord commands. If you look at the changes to r200_vtxfmt_c.c, you'll see that I had to make some changes to the way those routines work.

The previous patch is committed. The attached patch adds x86 & SSE codegen back. I've changed the way the codegen works just slightly.


Each codegen stub consists of a bit of assembly code that needs to be reloced / fixed-up at run-time. Prepended to the assembly code is a small preamble that describes how to do this. The preamble contains the size of the assembly stub and array of "fix-ups" that need to be done. The stub code follows immediatly after the array of fix-ups.

At run-time, the function r200_do_codegen is called to create the executable stub. It is passed a pointer to the stub's preamble and an array of fix-up values. Each entry in the stub's fix-up array specifies a size, an offset in the stub, and an element index to use for the fix-up. This is similar to how a reloc table works in an object file.

There are two obvious advantages. If a stub is modified, it is likely that only one file (the file containing the stub) needs to be updated.
Code size (in the form of FIXUP macros) is cut way down.


There are a couple of advantages to this that aren't fully realized in this code. This is a *lot* more cross-platform. The only difference between r200_makeX86TexCoord2f and r200_makeSSETexCoord2f (and the non-existent r200_makePowerPCTexCoord2f) is a single pointer passed to r200_do_codegen. This should make it possible to cut down on a lot of redundant code. Additionlly, since the codegen stubs contain all the information needed to do the fix-ups, it should be possible to share common assembly stubs in multiple places (i.e., _x86_Vertex3f in r200, radeon, and t_vertex).

One disadvantage is if the codegen_stub structure is changed. If that structure is changed, all of the assembly files will also have to change. However, there won't be any compiler warnings for any that are "missed." We'll just get mysterious codegen related bugs. :(

Another disadvantage is that this code seems to be more prone to cut-and-paste type errors.

If this new method is acceptable to everyone, I'll modify the rest of the codegen stubs in the R200 driver to use it. I'd really like to put some form of r200_do_codegen in a shared location so that other places that do codegen can re-use it.

? src/mesa/drivers/dri/r200/depend
Index: src/mesa/drivers/dri/r200/r200_context.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_context.h,v
retrieving revision 1.15
diff -w -u -d -r1.15 r200_context.h
--- a/src/mesa/drivers/dri/r200/r200_context.h  5 May 2004 20:16:17 -0000       1.15
+++ b/src/mesa/drivers/dri/r200/r200_context.h  5 May 2004 22:19:34 -0000
@@ -788,6 +788,7 @@
    r200_color_t *specptr;
    GLfloat *texcoordptr[2];
 
+   GLint texcoordsize[2];       /**< Number of elements in each tex coord. */
 
    GLenum *prim;               /* &ctx->Driver.CurrentExecPrimitive */
    GLuint primflags;
Index: src/mesa/drivers/dri/r200/r200_vtxfmt.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_vtxfmt.c,v
retrieving revision 1.9
diff -w -u -d -r1.9 r200_vtxfmt.c
--- a/src/mesa/drivers/dri/r200/r200_vtxfmt.c   5 May 2004 21:32:16 -0000       1.9
+++ b/src/mesa/drivers/dri/r200/r200_vtxfmt.c   5 May 2004 22:19:34 -0000
@@ -808,6 +808,8 @@
 
         rmesa->vb.vertex_size += count[i];
       }
+
+      rmesa->vb.texcoordsize[i] = count[i];
    }
 
    if (rmesa->vb.installed_vertex_format != rmesa->vb.vtxfmt_0) {
Index: src/mesa/drivers/dri/r200/r200_vtxfmt_sse.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_vtxfmt_sse.c,v
retrieving revision 1.5
diff -w -u -d -r1.5 r200_vtxfmt_sse.c
--- a/src/mesa/drivers/dri/r200/r200_vtxfmt_sse.c       5 May 2004 20:16:17 -0000      
 1.5
+++ b/src/mesa/drivers/dri/r200/r200_vtxfmt_sse.c       5 May 2004 22:19:34 -0000
@@ -45,44 +45,18 @@
 extern const char *FUNC;       \
 extern const char *FUNC##_end
 
-EXTERN( _sse_Attribute2fv );
-EXTERN( _sse_Attribute2f );
 EXTERN( _sse_Attribute3fv );
 EXTERN( _sse_Attribute3f );
-EXTERN( _sse_MultiTexCoord2fv );
-EXTERN( _sse_MultiTexCoord2f );
-EXTERN( _sse_MultiTexCoord2fv_2 );
-EXTERN( _sse_MultiTexCoord2f_2 );
 
-/* Build specialized versions of the immediate calls on the fly for
- * the current state.
- */
-
-static struct dynfn *r200_makeSSEAttribute2fv( struct dynfn * cache, const int * key,
-                                              const char * name, void * dest)
-{
-   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
-
-   if (R200_DEBUG & DEBUG_CODEGEN)
-      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
-
-   DFN ( _sse_Attribute2fv, (*cache) );
-   FIXUP(dfn->code, 10, 0x0, (int)dest);
-   return dfn;
-}
-
-static struct dynfn *r200_makeSSEAttribute2f( struct dynfn * cache, const int * key,
-                                             const char * name, void * dest )
-{
-   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
-
-   if (R200_DEBUG & DEBUG_CODEGEN)
-      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
+struct codegen_stub;
+extern struct dynfn * r200_do_codegen( const struct codegen_stub * stub,
+    struct dynfn * cache, const int *key, const intptr_t * data,
+    const char * func_name );
 
-   DFN ( _sse_Attribute2f, (*cache) );
-   FIXUP(dfn->code, 8, 0x0, (int)dest); 
-   return dfn;
-}
+extern struct codegen_stub _sse_TexCoord2fv_stub;
+extern struct codegen_stub _sse_TexCoord2f_stub;
+extern struct codegen_stub _sse_MultiTexCoord2fv_stub;
+extern struct codegen_stub _sse_MultiTexCoord2f_stub;
 
 static struct dynfn *r200_makeSSEAttribute3fv( struct dynfn * cache, const int * key,
                                               const char * name, void * dest)
@@ -154,59 +128,57 @@
    }
 }
 
-#if 0 /* Temporarily disabled as it is broken w/the new cubemap code. - idr */
 static struct dynfn *r200_makeSSETexCoord2fv( GLcontext *ctx, const int * key )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
 
-   return r200_makeSSEAttribute2fv( & rmesa->vb.dfn_cache.TexCoord2fv, key,
-                                   __FUNCTION__, rmesa->vb.texcoordptr[0] );
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
+
+   return r200_do_codegen( & _sse_TexCoord2fv_stub,
+                     & rmesa->vb.dfn_cache.TexCoord2fv,
+                     key, data, __func__ );
 }
 
 static struct dynfn *r200_makeSSETexCoord2f( GLcontext *ctx, const int * key )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
 
-   return r200_makeSSEAttribute2f( & rmesa->vb.dfn_cache.TexCoord2f, key,
-                                  __FUNCTION__, rmesa->vb.texcoordptr[0] );
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
+
+   return r200_do_codegen( & _sse_TexCoord2f_stub,
+                     & rmesa->vb.dfn_cache.TexCoord2f,
+                     key, data, __func__ );
 }
 
 static struct dynfn *r200_makeSSEMultiTexCoord2fv( GLcontext *ctx, const int * key )
 {
-   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
 
-   if (R200_DEBUG & DEBUG_CODEGEN)
-      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key[0] );
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
 
-   if (rmesa->vb.texcoordptr[1] == rmesa->vb.texcoordptr[0]+4) {
-      DFN ( _sse_MultiTexCoord2fv, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
-      FIXUP(dfn->code, 18, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]); 
-   } else {
-      DFN ( _sse_MultiTexCoord2fv_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
-      FIXUP(dfn->code, 14, 0x0, (int)rmesa->vb.texcoordptr);
-   }
-   return dfn;
+   return r200_do_codegen( & _sse_MultiTexCoord2fv_stub,
+                     & rmesa->vb.dfn_cache.MultiTexCoord2fvARB,
+                     key, data, __func__ );
 }
 
 static struct dynfn *r200_makeSSEMultiTexCoord2f( GLcontext *ctx, const int * key )
 {
-   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
 
-   if (R200_DEBUG & DEBUG_CODEGEN)
-      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key[0] );
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
 
-   if (rmesa->vb.texcoordptr[1] == rmesa->vb.texcoordptr[0]+4) {
-      DFN ( _sse_MultiTexCoord2f, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
-      FIXUP(dfn->code, 16, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]); 
-   } else {
-      DFN ( _sse_MultiTexCoord2f_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
-      FIXUP(dfn->code, 15, 0x0, (int)rmesa->vb.texcoordptr);
-   }
-   return dfn;
+   return r200_do_codegen( & _sse_MultiTexCoord2f_stub,
+                     & rmesa->vb.dfn_cache.MultiTexCoord2fARB,
+                     key, data, __func__ );
 }
-#endif
 
 void r200InitSSECodegen( struct dfn_generators *gen )
 {
@@ -215,12 +187,10 @@
       gen->Normal3f = (void *) r200_makeSSENormal3f;
       gen->Color3fv = (void *) r200_makeSSEColor3fv;
       gen->Color3f = (void *) r200_makeSSEColor3f;
-#if 0 /* Temporarily disabled as it is broken w/the new cubemap code. - idr */
       gen->TexCoord2fv = (void *) r200_makeSSETexCoord2fv;
       gen->TexCoord2f = (void *) r200_makeSSETexCoord2f;
       gen->MultiTexCoord2fvARB = (void *) r200_makeSSEMultiTexCoord2fv;
       gen->MultiTexCoord2fARB = (void *) r200_makeSSEMultiTexCoord2f;
-#endif
    }
 }
 
Index: src/mesa/drivers/dri/r200/r200_vtxfmt_x86.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_vtxfmt_x86.c,v
retrieving revision 1.4
diff -w -u -d -r1.4 r200_vtxfmt_x86.c
--- a/src/mesa/drivers/dri/r200/r200_vtxfmt_x86.c       5 May 2004 20:16:17 -0000      
 1.4
+++ b/src/mesa/drivers/dri/r200/r200_vtxfmt_x86.c       5 May 2004 22:19:34 -0000
@@ -40,12 +40,13 @@
 
 #if defined(USE_X86_ASM)
 
+static struct dynfn *r200_makeX86MultiTexCoord3fARB( GLcontext *, const int * );
+static struct dynfn *r200_makeX86MultiTexCoord3fvARB( GLcontext *, const int * );
+
 #define EXTERN( FUNC )         \
 extern const char *FUNC;       \
 extern const char *FUNC##_end
 
-EXTERN ( _x86_Attribute2fv );
-EXTERN ( _x86_Attribute2f );
 EXTERN ( _x86_Attribute3fv );
 EXTERN ( _x86_Attribute3f );
 EXTERN ( _x86_Vertex3fv_6 );
@@ -57,10 +58,62 @@
 EXTERN ( _x86_Color4ubv_ub );
 EXTERN ( _x86_Color4ubv_4f );
 EXTERN ( _x86_Color4ub_ub );
-EXTERN ( _x86_MultiTexCoord2fv );
-EXTERN ( _x86_MultiTexCoord2fv_2 );
-EXTERN ( _x86_MultiTexCoord2f );
-EXTERN ( _x86_MultiTexCoord2f_2 );
+
+struct codegen_stub {
+   uint32_t    size;           /**< Size, in bytes, of the code stub. */
+   uint32_t    num_fixups;      /**< Number of fix-up entries. */
+   struct {
+      uint32_t bytes;           /**< Size, in bytes, of the fix-up. */
+      uint32_t offset;          /**< Offset into the code of fix-up. */
+      uint32_t entry;           /**< Entry in data table to use for fix-up. */
+   } fixups[0];
+};
+
+extern struct codegen_stub _x86_TexCoord2fv_stub;
+extern struct codegen_stub _x86_TexCoord2f_stub;
+
+extern struct codegen_stub _x86_MultiTexCoord1fv_stub;
+extern struct codegen_stub _x86_MultiTexCoord1f_stub;
+extern struct codegen_stub _x86_MultiTexCoord2fv_stub;
+extern struct codegen_stub _x86_MultiTexCoord2f_stub;
+extern struct codegen_stub _x86_MultiTexCoord3fv_stub;
+extern struct codegen_stub _x86_MultiTexCoord3f_stub;
+
+/**
+ * \bugs
+ * This routine will need to be modified to support data smaller than
+ * \c intptr_t on big-endian machines.
+ */
+struct dynfn *
+r200_do_codegen( const struct codegen_stub * stub, struct dynfn * cache,
+                const int *key, const intptr_t * data, 
+                const char * func_name )
+{
+   struct dynfn * dfn = MALLOC_STRUCT( dynfn );
+   const char   * code_base;
+   unsigned       i;
+
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x 0x%08x\n", func_name, key[0], key[1] );
+
+   code_base = (const char *) & stub->fixups[ stub->num_fixups ];
+
+   insert_at_head( cache, dfn );
+   dfn->key[0] = key[0];
+   dfn->key[1] = key[1];
+   dfn->code = ALIGN_MALLOC( stub->size, 16 );
+   (void) memcpy( dfn->code, code_base, stub->size );
+
+   for ( i = 0 ; i < stub->num_fixups ; i++ ) {
+      char * fixup_base = dfn->code + stub->fixups[i].offset;
+
+      (void) memcpy( fixup_base, & data[ stub->fixups[i].entry ],
+                    stub->fixups[i].bytes );
+   }
+
+   return dfn;
+}
 
 
 /* Build specialized versions of the immediate calls on the fly for
@@ -180,37 +233,6 @@
    return dfn;
 }
 
-static struct dynfn *
-r200_makeX86Attribute2fv( struct dynfn * cache, const int *key,
-                         const char * name, void * dest )
-{
-   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
-
-   if (R200_DEBUG & DEBUG_CODEGEN)
-      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
-
-   DFN ( _x86_Attribute2fv, (*cache) );
-   FIXUP(dfn->code, 11, 0x0, (int)dest); 
-   FIXUP(dfn->code, 16, 0x4, 4+(int)dest); 
-
-   return dfn;
-}
-
-static struct dynfn *
-r200_makeX86Attribute2f( struct dynfn * cache, const int *key,
-                        const char * name, void * dest )
-{
-   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
-
-   if (R200_DEBUG & DEBUG_CODEGEN)
-      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
-
-   DFN ( _x86_Attribute2f, (*cache) );
-   FIXUP(dfn->code, 1, 0x0, (int)dest); 
-
-   return dfn;
-}
-
 
 static struct dynfn *
 r200_makeX86Attribute3fv( struct dynfn * cache, const int *key,
@@ -336,67 +358,110 @@
 }
 
 
-
-#if 0 /* Temporarily disabled as it is broken w/the new cubemap code. - idr */
 struct dynfn *r200_makeX86TexCoord2fv( GLcontext *ctx, const int *key )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
 
-   return r200_makeX86Attribute2fv( & rmesa->vb.dfn_cache.TexCoord2fv, key,
-                                   __FUNCTION__, rmesa->vb.texcoordptr[0] );
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
+
+   return r200_do_codegen( & _x86_TexCoord2fv_stub,
+                     & rmesa->vb.dfn_cache.TexCoord2fv,
+                     key, data, __func__ );
 }
 
 struct dynfn *r200_makeX86TexCoord2f( GLcontext *ctx, const int *key )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
 
-   return r200_makeX86Attribute2f( & rmesa->vb.dfn_cache.TexCoord2f, key,
-                                  __FUNCTION__, rmesa->vb.texcoordptr[0] );
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
+
+   return r200_do_codegen( & _x86_TexCoord2f_stub,
+                     & rmesa->vb.dfn_cache.TexCoord2f,
+                     key, data, __func__ );
 }
 
-struct dynfn *r200_makeX86MultiTexCoord2fvARB( GLcontext *ctx, const int *key )
+
+struct dynfn *r200_makeX86MultiTexCoord1fvARB( GLcontext *ctx, const int *key )
 {
-   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
 
-   if (R200_DEBUG & DEBUG_CODEGEN)
-      fprintf(stderr, "%s 0x%08x 0x%08x\n", __FUNCTION__, key[0], key[1] );
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
 
-   if (rmesa->vb.texcoordptr[1] == rmesa->vb.texcoordptr[0]+4) {
-      DFN ( _x86_MultiTexCoord2fv, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
-      FIXUP(dfn->code, 21, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]); 
-      FIXUP(dfn->code, 27, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]+4);
-   } else {
-      DFN ( _x86_MultiTexCoord2fv_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
-      FIXUP(dfn->code, 14, 0x0, (int)rmesa->vb.texcoordptr);
+   return r200_do_codegen( & _x86_MultiTexCoord1fv_stub,
+                     & rmesa->vb.dfn_cache.MultiTexCoord1fvARB,
+                     key, data, __func__ );
    }
-   return dfn;
+
+struct dynfn *r200_makeX86MultiTexCoord1fARB( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
+
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
+
+   return r200_do_codegen( & _x86_MultiTexCoord1f_stub,
+                     & rmesa->vb.dfn_cache.MultiTexCoord1fARB,
+                     key, data, __func__ );
 }
 
-struct dynfn *r200_makeX86MultiTexCoord2fARB( GLcontext *ctx, 
-                                             const int *key )
+struct dynfn *r200_makeX86MultiTexCoord2fvARB( GLcontext *ctx, const int *key )
 {
-   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
 
-   if (R200_DEBUG & DEBUG_CODEGEN)
-      fprintf(stderr, "%s 0x%08x 0x%08x\n", __FUNCTION__, key[0], key[1] );
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
 
-   if (rmesa->vb.texcoordptr[1] == rmesa->vb.texcoordptr[0]+4) {
-      DFN ( _x86_MultiTexCoord2f, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
-      FIXUP(dfn->code, 20, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]); 
-      FIXUP(dfn->code, 26, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]+4); 
+   return r200_do_codegen( & _x86_MultiTexCoord2fv_stub,
+                     & rmesa->vb.dfn_cache.MultiTexCoord2fvARB,
+                     key, data, __func__ );
    }
-   else {
-      /* Note: this might get generated multiple times, even though the
-       * actual emitted code is the same.
-       */
-      DFN ( _x86_MultiTexCoord2f_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
-      FIXUP(dfn->code, 18, 0x0, (int)rmesa->vb.texcoordptr); 
+
+struct dynfn *r200_makeX86MultiTexCoord2fARB( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
+
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
+
+   return r200_do_codegen( & _x86_MultiTexCoord2f_stub,
+                     & rmesa->vb.dfn_cache.MultiTexCoord2fARB,
+                     key, data, __func__ );
    }      
-   return dfn;
+
+struct dynfn *r200_makeX86MultiTexCoord3fvARB( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
+
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
+
+   return r200_do_codegen( & _x86_MultiTexCoord3fv_stub,
+                     & rmesa->vb.dfn_cache.MultiTexCoord3fvARB,
+                     key, data, __func__ );
+}
+
+struct dynfn *r200_makeX86MultiTexCoord3fARB( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   intptr_t data[2];
+
+   data[0] = (intptr_t) rmesa->vb.texcoordsize;
+   data[1] = (intptr_t) rmesa->vb.texcoordptr;
+
+   return r200_do_codegen( & _x86_MultiTexCoord3f_stub,
+                     & rmesa->vb.dfn_cache.MultiTexCoord3fARB,
+                     key, data, __func__ );
 }
-#endif
 
 void r200InitX86Codegen( struct dfn_generators *gen )
 {
@@ -406,12 +471,14 @@
    gen->Color4ubv = r200_makeX86Color4ubv; /* PKCOLOR only */
    gen->Normal3f = r200_makeX86Normal3f;
    gen->Normal3fv = r200_makeX86Normal3fv;
-#if 0 /* Temporarily disabled as it is broken w/the new cubemap code. - idr */
    gen->TexCoord2f = r200_makeX86TexCoord2f;
    gen->TexCoord2fv = r200_makeX86TexCoord2fv;
+   gen->MultiTexCoord1fARB = r200_makeX86MultiTexCoord1fARB;
+   gen->MultiTexCoord1fvARB = r200_makeX86MultiTexCoord1fvARB;
    gen->MultiTexCoord2fARB = r200_makeX86MultiTexCoord2fARB;
    gen->MultiTexCoord2fvARB = r200_makeX86MultiTexCoord2fvARB;
-#endif
+   gen->MultiTexCoord3fARB = r200_makeX86MultiTexCoord3fARB;
+   gen->MultiTexCoord3fvARB = r200_makeX86MultiTexCoord3fvARB;
    gen->Color3f = r200_makeX86Color3f;
    gen->Color3fv = r200_makeX86Color3fv;
 
@@ -425,8 +492,6 @@
 /*     gen->Color4fv = r200_makeX86Color4fv; */
 /*     gen->TexCoord1f = r200_makeX86TexCoord1f; */
 /*     gen->TexCoord1fv = r200_makeX86TexCoord1fv; */
-/*     gen->MultiTexCoord1fARB = r200_makeX86MultiTexCoord1fARB; */
-/*     gen->MultiTexCoord1fvARB = r200_makeX86MultiTexCoord1fvARB; */
 }
 
 
Index: src/mesa/drivers/dri/r200/r200_vtxtmp_x86.S
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_vtxtmp_x86.S,v
retrieving revision 1.4
diff -w -u -d -r1.4 r200_vtxtmp_x86.S
--- a/src/mesa/drivers/dri/r200/r200_vtxtmp_x86.S       26 Apr 2004 10:10:23 -0000     
 1.4
+++ b/src/mesa/drivers/dri/r200/r200_vtxtmp_x86.S       5 May 2004 22:19:34 -0000
@@ -30,7 +30,9 @@
 .globl x;              \
 x:
 
-.data
+#define TEX_TARGET_MASK   0x01
+
+.section .rodata
 .align 4
 
 /*
@@ -210,39 +212,8 @@
 
 
 /**
- * Generic handler for 2 float format data.  This can be used for
- * TexCoord2f and possibly other functions.
- */
-
-GLOBL ( _x86_Attribute2f )
-       movl    $0x0, %edx
-       movl    4(%esp), %eax
-       movl    8(%esp), %ecx
-       movl    %eax, (%edx)
-       movl    %ecx, 4(%edx)
-       ret
-GLOBL ( _x86_Attribute2f_end )
-
-
-/**
- * Generic handler for 2 float vector format data.  This can be used for
- * TexCoord2fv and possibly other functions.
- */
-
-GLOBL( _x86_Attribute2fv)
-       movl 4(%esp), %eax      /* load 'v' off stack */
-       movl (%eax), %ecx       /* load v[0] */
-       movl 4(%eax), %eax      /* load v[1] */
-       movl %ecx, 0            /* store v[0] to current vertex */
-       movl %eax, 4            /* store v[1] to current vertex */
-       ret
-GLOBL ( _x86_Attribute2fv_end )
-
-
-/**
  * Generic handler for 3 float format data.  This can be used for
- * Normal3f, Color3f (when the color target is also float), or
- * TexCoord3f.
+ * Normal3f or Color3f (when the color target is also float).
  */
 
 GLOBL ( _x86_Attribute3f )
@@ -257,8 +228,7 @@
 
 /**
  * Generic handler for 3 float vector format data.  This can be used for
- * Normal3f, Color3f (when the color target is also float), or
- * TexCoord3f.
+ * Normal3f or Color3f (when the color target is also float).
  */
 
 GLOBL( _x86_Attribute3fv)
@@ -332,64 +302,279 @@
 GLOBL( _x86_Color4ub_ub_end )
 
 
-/*
-       MultiTexCoord2fv st0/st1
+/**
+ * Generic x86 assembly stub for codegen \c glTexCoord2fv.
 */
-GLOBL( _x86_MultiTexCoord2fv )
-       movl    4(%esp), %eax
-       movl    8(%esp), %ecx
-       and     $1, %eax
+GLOBL( _x86_TexCoord2fv_stub )
+       .long   _x86_TexCoord2fv_end - _x86_TexCoord2fv
+       .long   2
+       .long   4, 6, 0
+       .long   4, 11, 1
+_x86_TexCoord2fv:
+       movl    4(%esp), %ecx
+       movl    0, %edx                 # texcoord_size[0] is 1, 2, or 3
+       movl    0, %eax                 # texcoord_ptr[0]
+
+       decl    %edx
+       jne     .5_2fv
+
+       movl    (%ecx), %ecx
+       movl    %ecx, (%eax)
+       ret
+
+.5_2fv:        decl    %edx
        movl    (%ecx), %edx
-       shl     $3, %eax
        movl    4(%ecx), %ecx
-       movl    %edx, 0xdeadbeef(%eax)
-       movl    %ecx, 0xdeadbeef(%eax)
+       movl    %edx, (%eax)
+       movl    %ecx, 4(%eax)
+       
+       jne     .6_2fv
        ret
-GLOBL( _x86_MultiTexCoord2fv_end )
 
-/*
-       MultiTexCoord2fv
+.6_2fv:        xorl    %edx, %edx
+       movl    %edx, 8(%eax)
+       ret
+_x86_TexCoord2fv_end:
+
+
+/**
+ * Generic x86 assembly stub for codegen \c glTexCoord2f.
 */
+GLOBL( _x86_TexCoord2f_stub )
+       .long   _x86_TexCoord2f_end - _x86_TexCoord2f
+       .long   2
+       .long   4, 6, 0
+       .long   4, 11, 1
+_x86_TexCoord2f:
+       movl    4(%esp), %ecx
+       movl    0, %edx                 # texcoord_size[0] is 1, 2, or 3
+       movl    0, %eax                 # texcoord_ptr[0]
 
-GLOBL( _x86_MultiTexCoord2fv_2 )
-       movl    4(%esp,1), %eax
-       movl    8(%esp,1), %ecx
-       and     $0x1, %eax
-       movl    0(,%eax,4), %edx
-       movl    (%ecx), %eax
-       movl    %eax, (%edx)
-       movl    4(%ecx), %eax
-       movl    %eax, 4(%edx)
+       decl    %edx
+       movl    %ecx, (%eax)
+       movl    12(%esp), %ecx
+       je      .1_2f
+
+       decl    %edx
+       movl    %ecx, 4(%eax)
+       jne     .2_2f
+
+.1_2f: ret
+
+.2_2f: xorl    %edx, %edx
+       movl    %edx, 8(%eax)
        ret
-GLOBL( _x86_MultiTexCoord2fv_2_end )
+_x86_TexCoord2f_end:
 
-/*
-       MultiTexCoord2f st0/st1
+
+/**
+ * Generic x86 assembly stub for codegen \c glMultiTexCoord1fv.
 */
-GLOBL( _x86_MultiTexCoord2f )
+GLOBL( _x86_MultiTexCoord1fv_stub )
+       .long   _x86_MultiTexCoord1fv_end - _x86_MultiTexCoord1fv
+       .long   2
+       .long   4, 14, 0
+       .long   4, 21, 1
+_x86_MultiTexCoord1fv:
        movl    4(%esp), %eax
-       movl    8(%esp), %edx
+       movl    8(%esp), %ecx
+       and     $TEX_TARGET_MASK, %eax
+
+       movl    0(,%eax,4), %edx        # texcoord_size[unit] is 1, 2, or 3
+       movl    0(,%eax,4), %eax        # texcoord_ptr[unit]
+       decl    %edx
+
+       movl    (%ecx), %ecx
+       movl    %ecx, (%eax)
+
+       jne     .1_1fv
+       ret
+
+.1_1fv:        xorl    %ecx, %ecx
+       decl    %edx
+       movl    %ecx, 4(%eax)
+       jne     .2_1fv
+       ret
+
+.2_1fv:        movl    %ecx, 8(%eax)
+       ret
+_x86_MultiTexCoord1fv_end:
+
+
+/**
+ * Generic x86 assembly stub for codegen \c glMultiTexCoord1f.
+ */
+GLOBL( _x86_MultiTexCoord1f_stub )
+       .long   _x86_MultiTexCoord1f_end - _x86_MultiTexCoord1f
+       .long   2
+       .long   4, 14, 0
+       .long   4, 21, 1
+_x86_MultiTexCoord1f:
+       movl    4(%esp), %eax
+       movl    8(%esp), %ecx
+       and     $TEX_TARGET_MASK, %eax
+
+       movl    0(,%eax,4), %edx        # texcoord_size[unit] is 1, 2, or 3
+       movl    0(,%eax,4), %eax        # texcoord_ptr[unit]
+       decl    %edx
+
+       movl    %ecx, (%eax)
+
+       jne     .1_1f
+       ret
+
+.1_1f: xorl    %ecx, %ecx
+       decl    %edx
+       movl    %ecx, 4(%eax)
+       jne     .2_1f
+       ret
+
+.2_1f: movl    %ecx, 8(%eax)
+       ret
+_x86_MultiTexCoord1f_end:
+
+
+/**
+ * Generic x86 assembly stub for codegen \c glMultiTexCoord2fv.
+ */
+GLOBL( _x86_MultiTexCoord2fv_stub )
+       .long   _x86_MultiTexCoord2fv_end - _x86_MultiTexCoord2fv
+       .long   2
+       .long   4, 14, 0
+       .long   4, 21, 1
+_x86_MultiTexCoord2fv:
+       movl    4(%esp), %eax
+       movl    8(%esp), %ecx
+       and     $TEX_TARGET_MASK, %eax
+
+       movl    0(,%eax,4), %edx        # texcoord_size[unit] is 1, 2, or 3
+       movl    0(,%eax,4), %eax        # texcoord_ptr[unit]
+
+       decl    %edx
+       jne     .3_2fv
+
+       movl    (%ecx), %edx
+       movl    %edx, (%eax)
+       ret
+
+.3_2fv:        decl    %edx
+       movl    (%ecx), %edx
+       movl    4(%ecx), %ecx
+       movl    %edx, (%eax)
+       movl    %ecx, 4(%eax)
+       jne     .4_2fv
+       ret
+
+.4_2fv:        xorl    %edx, %edx
+       movl    %edx, 8(%eax)
+       ret
+_x86_MultiTexCoord2fv_end:
+
+
+/**
+ * Generic x86 assembly stub for codegen \c glMultiTexCoord2f.
+ */
+GLOBL( _x86_MultiTexCoord2f_stub )
+       .long   _x86_MultiTexCoord2f_end - _x86_MultiTexCoord2f
+       .long   2
+       .long   4, 14, 0
+       .long   4, 21, 1
+_x86_MultiTexCoord2f:
+       movl    4(%esp), %eax
+       movl    8(%esp), %ecx
+       and     $TEX_TARGET_MASK, %eax
+
+       movl    0(,%eax,4), %edx        # texcoord_size[unit] is 1, 2, or 3
+       movl    0(,%eax,4), %eax        # texcoord_ptr[unit]
+
+       decl    %edx
+       movl    %ecx, (%eax)
        movl    12(%esp), %ecx
-       and     $1, %eax
-       shl     $3, %eax
-       movl    %edx, 0xdeadbeef(%eax)
-       movl    %ecx, 0xdeadbeef(%eax)
+       je      .3_2f
+
+       decl    %edx
+       movl    %ecx, 4(%eax)
+       jne     .4_2f
+
+.3_2f: ret
+
+.4_2f: xorl    %edx, %edx
+       movl    %edx, 8(%eax)
        ret
-GLOBL( _x86_MultiTexCoord2f_end )
+_x86_MultiTexCoord2f_end:
 
-/*
-       MultiTexCoord2f
+
+/**
+ * Generic x86 assembly stub for codegen \c glMultiTexCoord3fv.
 */
-GLOBL( _x86_MultiTexCoord2f_2 )
+GLOBL( _x86_MultiTexCoord3fv_stub )
+       .long   _x86_MultiTexCoord3fv_end - _x86_MultiTexCoord3fv
+       .long   2
+       .long   4, 14, 0
+       .long   4, 21, 1
+_x86_MultiTexCoord3fv:
        movl    4(%esp), %eax
-       movl    8(%esp), %edx
-       movl    12(%esp,1), %ecx
-       and     $1,%eax
-       movl    0(,%eax,4), %eax
+       movl    8(%esp), %ecx
+       and     $TEX_TARGET_MASK, %eax
+
+       movl    0(,%eax,4), %edx        # texcoord_size[unit] is 1, 2, or 3
+       movl    0(,%eax,4), %eax        # texcoord_ptr[unit]
+
+       decl    %edx
+       jne     .5_3fv
+
+       movl    (%ecx), %edx
+       movl    %edx, (%eax)
+       ret
+
+.5_3fv:        decl    %edx
+       movl    (%ecx), %edx
        movl    %edx, (%eax)
+       movl    4(%ecx), %edx
+       movl    %edx, 4(%eax)
+       jne     .6_3fv
+       ret
+
+.6_3fv:        movl    8(%ecx), %edx
+       movl    %edx, 8(%eax)
+       ret
+_x86_MultiTexCoord3fv_end:
+
+
+/**
+ * Generic x86 assembly stub for codegen \c glMultiTexCoord3f.
+ */
+GLOBL( _x86_MultiTexCoord3f_stub )
+       .long   _x86_MultiTexCoord3f_end - _x86_MultiTexCoord3f
+       .long   2
+       .long   4, 14, 0
+       .long   4, 21, 1
+_x86_MultiTexCoord3f:
+       movl    4(%esp), %eax
+       movl    8(%esp), %ecx
+       and     $TEX_TARGET_MASK, %eax
+
+       movl    0(,%eax,4), %edx        # texcoord_size[unit] is 1, 2, or 3
+       movl    0(,%eax,4), %eax        # texcoord_ptr[unit]
+
+       decl    %edx
+
+       movl    %ecx, (%eax)
+       movl    12(%esp), %ecx
+
+       je      .5_3f
+
+       decl    %edx
+       movl    16(%esp), %edx
        movl    %ecx, 4(%eax)
+       jne     .6_3f
+
+.5_3f: ret
+
+.6_3f: movl    %edx, 8(%eax)
        ret
-GLOBL( _x86_MultiTexCoord2f_2_end )
+_x86_MultiTexCoord3f_end:
+
 
 #if defined(USE_SSE_ASM)
 /**
@@ -421,74 +606,133 @@
 
 
 /**
- * Generic handler for 2 float vector format data.  This can be used for
- * TexCoord2fv and possibly other functions.
+ * SSE optimized assembly stub for codegen \c glMultiTexCoord2fv.
  */
+GLOBL( _sse_TexCoord2fv_stub )
+       .long   _sse_MultiTexCoord2fv_end - _sse_MultiTexCoord2fv
+       .long   2
+       .long   4, 6, 0
+       .long   4, 11, 1
+_sse_TexCoord2fv:
+       movl    4(%esp), %ecx
+       movl    0, %edx                 # texcoord_size[unit] is 1, 2, or 3
+       movl    0, %eax                 # texcoord_ptr[unit]
 
-GLOBL( _sse_Attribute2fv )
-       movl    4(%esp), %eax
-       movlps  (%eax), %xmm0
-       movlps  %xmm0, 0
+       movlps  (%ecx), %xmm0
+       xorl    %ecx, %ecx
+
+       decl    %edx
+       je      .b_2fv
+
+       decl    %edx
+       movlps  %xmm0, (%eax)
+       jne     .c_2fv
        ret
-GLOBL( _sse_Attribute2fv_end )
+
+.b_2fv:        movss   %xmm0, (%eax)
+       ret
+
+.c_2fv:        movl    %ecx, 8(%eax)
+       ret
+_sse_TexCoord2fv_end:
 
 
 /**
- * Generic handler for 2 float format data.  This can be used for
- * TexCoord2f and possibly other functions.
+ * SSE optimized assembly stub for codegen \c glMultiTexCoord2f.
  */
-
-GLOBL( _sse_Attribute2f )
+GLOBL( _sse_TexCoord2f_stub )
+       .long   _sse_TexCoord2f_end - _sse_TexCoord2f
+       .long   2
+       .long   4, 7, 0
+       .long   4, 12, 1
+_sse_TexCoord2f:
        movlps  4(%esp), %xmm0
-       movlps  %xmm0, 0
+       movl    0, %edx                 # texcoord_size[unit] is 1, 2, or 3
+       movl    0, %eax                 # texcoord_ptr[unit]
+
+       xorl    %ecx, %ecx
+
+       decl    %edx
+       je      .d_2fv
+
+       decl    %edx
+       movlps  %xmm0, (%eax)
+       jne     .e_2fv
        ret
-GLOBL( _sse_Attribute2f_end )
 
-/*
-       MultiTexCoord2fv st0/st1
-*/
-GLOBL( _sse_MultiTexCoord2fv )
-       movl    4(%esp), %eax
-       movl    8(%esp), %ecx
-       and     $1, %eax
-       movlps  (%ecx), %xmm0
-       movlps  %xmm0, 0xdeadbeef(,%eax,8)
+.d_2fv:        movss   %xmm0, (%eax)
        ret
-GLOBL( _sse_MultiTexCoord2fv_end )
 
-/*
-       MultiTexCoord2fv
+.e_2fv:        movl    %ecx, 8(%eax)
+       ret
+_sse_TexCoord2f_end:
+
+
+/**
+ * SSE optimized assembly stub for codegen \c glMultiTexCoord2fv.
 */
-GLOBL( _sse_MultiTexCoord2fv_2 )
+GLOBL( _sse_MultiTexCoord2fv_stub )
+       .long   _sse_MultiTexCoord2fv_end - _sse_MultiTexCoord2fv
+       .long   2
+       .long   4, 14, 0
+       .long   4, 21, 1
+_sse_MultiTexCoord2fv:
        movl    4(%esp), %eax
        movl    8(%esp), %ecx
-       and     $0x1, %eax
-       movl    0(,%eax,4), %edx
+       and     $TEX_TARGET_MASK, %eax
+
+       movl    0(,%eax,4), %edx        # texcoord_size[unit] is 1, 2, or 3
+       movl    0(,%eax,4), %eax        # texcoord_ptr[unit]
+
        movlps  (%ecx), %xmm0
-       movlps  %xmm0, (%edx)
+       xorl    %ecx, %ecx
+
+       decl    %edx
+       je      .7_2fv
+
+       decl    %edx
+       movlps  %xmm0, (%eax)
+       jne     .8_2fv
        ret
-GLOBL( _sse_MultiTexCoord2fv_2_end )
 
-/*
-       MultiTexCoord2f st0/st1
-*/
-GLOBL( _sse_MultiTexCoord2f )
-       movl    4(%esp), %eax
-       and     $1, %eax
-       movlps  8(%esp), %xmm0
-       movlps  %xmm0, 0xdeadbeef(,%eax,8)
+.7_2fv:        movss   %xmm0, (%eax)
        ret
-GLOBL( _sse_MultiTexCoord2f_end )
 
-/*
-       MultiTexCoord2f
+.8_2fv:        movl    %ecx, 8(%eax)
+       ret
+_sse_MultiTexCoord2fv_end:
+
+
+/**
+ * SSE optimized assembly stub for codegen \c glMultiTexCoord2f.
 */
-GLOBL( _sse_MultiTexCoord2f_2 )
+GLOBL( _sse_MultiTexCoord2f_stub )
+       .long   _sse_MultiTexCoord2f_end - _sse_MultiTexCoord2f
+       .long   2
+       .long   4, 17, 0
+       .long   4, 24, 1
+_sse_MultiTexCoord2f:
        movl    4(%esp), %eax
        movlps  8(%esp), %xmm0
-       and     $1,%eax
-       movl    0(,%eax,4), %eax
+       and     $TEX_TARGET_MASK, %eax
+
+       xorl    %ecx, %ecx
+
+       movl    0(,%eax,4), %edx        # texcoord_size[unit] is 1, 2, or 3
+       movl    0(,%eax,4), %eax        # texcoord_ptr[unit]
+
+       decl    %edx
+       je      .9_2f
+
+       decl    %edx
        movlps  %xmm0, (%eax)
+       jne     .a_2f
        ret
-GLOBL( _sse_MultiTexCoord2f_2_end )
+
+.9_2f: movss   %xmm0, (%eax)
+       ret
+
+.a_2f: movl    %ecx, 8(%eax)
+       ret
+_sse_MultiTexCoord2f_end:
 #endif

Reply via email to