On Mon, 27 Jun 2005 01:57:56 +0200 Roland Scheidegger <[EMAIL PROTECTED]> wrote:
> Ben Skeggs wrote: > > S3TC does seem to be the killer for UT2004. I started porting over the > > S3TC stuff from the r200 driver a while > > back, but haven't had a lot of time recently to fix a couple of issues > > with it. Overall fps doesn't seem to take a > > huge gain, but the sudden drops to 1-2fps in certain levels > > (CTF-Faceclassic) disappear when S3TC's enabled. > That's true, but to avoid the huge drops you could also just decrease > texture detail. Or implement the second texture heap in main memory and > use gart texturing (though you'd also need to manually increase the gart > size). There are some problems with that for r200, and the strategy for > what textures to put where may not be optimal currently, but the drops > should be gone. > That said, the performance in ut2k4 is probably really slow (apart from > that problem) due to deficiencies in drawArrays handling, at least that > was the case for r200 last time I checked... First "hack" attempts to improve it. Later two patches workaround RADEON_BUFFER_SIZE limit. While this actually appears to work theres no speed boost in general. -- Aapo Tahkola
Index: t_array_api.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/tnl/t_array_api.c,v
retrieving revision 1.52
diff -u -b -B -u -r1.52 t_array_api.c
--- t_array_api.c 18 Jul 2005 12:31:30 -0000 1.52
+++ t_array_api.c 27 Jul 2005 20:28:16 -0000
@@ -78,21 +78,20 @@
}
-/* Note this function no longer takes a 'start' value, the range is
- * assumed to start at zero. The old trick of subtracting 'start'
- * from each index won't work if the indices are not in writeable
- * memory.
- */
static void _tnl_draw_range_elements( GLcontext *ctx, GLenum mode,
+ GLuint min_index,
GLuint max_index,
GLsizei index_count, GLuint *indices )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct tnl_prim prim;
+ int i;
+ static int size=0;
+ static GLuint *ind=NULL;
FLUSH_CURRENT( ctx, 0 );
- _tnl_vb_bind_arrays( ctx, 0, max_index );
+ _tnl_vb_bind_arrays( ctx, min_index, max_index );
tnl->vb.Primitive = &prim;
tnl->vb.Primitive[0].mode = mode | PRIM_BEGIN | PRIM_END;
@@ -100,8 +99,15 @@
tnl->vb.Primitive[0].count = index_count;
tnl->vb.PrimitiveCount = 1;
- tnl->vb.Elts = (GLuint *)indices;
+ if(index_count > size){
+ size = index_count;
+ free(ind);
+ ind = malloc(index_count * sizeof(GLuint));
+ }
+ for(i=0; i < index_count; i++)
+ ind[i] = indices[i] - min_index;
+ tnl->vb.Elts = ind;
tnl->Driver.RunPipeline( ctx );
}
@@ -297,20 +301,19 @@
* at the whole locked range.
*/
- if (start == 0 && ctx->Array.LockFirst == 0 &&
- end < (ctx->Array.LockFirst + ctx->Array.LockCount))
- _tnl_draw_range_elements( ctx, mode,
+ if (end-start+1 < (ctx->Array.LockFirst + ctx->Array.LockCount)){
+ _tnl_draw_range_elements( ctx, mode, start,
ctx->Array.LockCount,
count, ui_indices );
- else {
+ } else {
fallback_drawelements( ctx, mode, count, ui_indices );
}
}
- else if (start == 0 && end < ctx->Const.MaxArrayLockSize) {
+ else if (end-start+1 < ctx->Const.MaxArrayLockSize) {
/* The arrays aren't locked but we can still fit them inside a
* single vertexbuffer.
*/
- _tnl_draw_range_elements( ctx, mode, end + 1, count, ui_indices );
+ _tnl_draw_range_elements( ctx, mode, start, end + 1, count, ui_indices );
}
else {
/* Range is too big to optimize:
@@ -352,7 +355,7 @@
if (ctx->Array.LockCount) {
if (ctx->Array.LockFirst == 0)
- _tnl_draw_range_elements( ctx, mode,
+ _tnl_draw_range_elements( ctx, mode, 0,
ctx->Array.LockCount,
count, ui_indices );
else
@@ -361,16 +364,18 @@
else {
/* Scan the index list and see if we can use the locked path anyway.
*/
- GLuint max_elt = 0;
+ GLuint max_elt = 0, min_elt = ~0;
GLint i;
- for (i = 0 ; i < count ; i++)
+ for (i = 0 ; i < count ; i++){
if (ui_indices[i] > max_elt)
max_elt = ui_indices[i];
-
- if (max_elt < ctx->Const.MaxArrayLockSize && /* can we use it? */
- max_elt < (GLuint) count) /* do we want to use it? */
- _tnl_draw_range_elements( ctx, mode, max_elt+1, count, ui_indices );
+ if (ui_indices[i] < min_elt)
+ min_elt = ui_indices[i];
+ }
+ if (max_elt-min_elt+1 < ctx->Const.MaxArrayLockSize && /* can we use it?
*/
+ max_elt-min_elt+1 < (GLuint) count) /* do we want to use
it? */
+ _tnl_draw_range_elements( ctx, mode, min_elt, max_elt+1, count,
ui_indices );
else
fallback_drawelements( ctx, mode, count, ui_indices );
}
Index: radeon_cp.c
===================================================================
RCS file: /cvsroot/r300/r300_driver/drm/shared-core/radeon_cp.c,v
retrieving revision 1.11
diff -u -b -B -u -r1.11 radeon_cp.c
--- radeon_cp.c 27 Jun 2005 19:46:30 -0000 1.11
+++ radeon_cp.c 27 Jul 2005 21:33:48 -0000
@@ -1977,6 +1977,7 @@
return DRM_ERR(EBUSY);
}
+#if 0
static int radeon_cp_get_buffers(DRMFILE filp, drm_device_t * dev,
drm_dma_t * d)
{
@@ -2001,7 +2002,70 @@
}
return 0;
}
+#else
+static int radeon_cp_get_buffers(DRMFILE filp, drm_device_t * dev,
+ drm_dma_t * d)
+{
+ int i, start;
+ drm_buf_t *buf;
+ drm_buf_t *buffers[32];
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ drm_device_dma_t *dma = dev->dma;
+ drm_radeon_buf_priv_t *buf_priv;
+
+ start = dev_priv->last_buf+1;
+ if (start >= dma->buf_count)
+ start = 0;
+ d->granted_count = 0;
+
+ if(d->request_count > 32)
+ return DRM_ERR(EINVAL);
+
+ again:
+ for(i=start; i < dma->buf_count; i++){
+ u32 done_age = GET_SCRATCH(1);
+
+ buf = dma->buflist[i];
+ buf_priv = buf->dev_private;
+ if (buf->filp == 0 || (buf->pending &&
+ buf_priv->age <= done_age)) {
+ buffers[d->granted_count] = buf;
+ d->granted_count++;
+ }else d->granted_count = 0;
+
+ if(d->granted_count == d->request_count)
+ goto done;
+ }
+ if(start != 0){
+ d->granted_count = 0;
+ start = 0;
+ goto again;
+ }
+ done:
+ if(d->granted_count != d->request_count)
+ return DRM_ERR(EBUSY); /* NOTE: broken client */
+
+ dev_priv->last_buf += d->granted_count;
+ if (dev_priv->last_buf >= dma->buf_count)
+ dev_priv->last_buf = 0; /* XXX */
+
+ for(i=0; i < d->granted_count; i++){
+ buffers[i]->pending = 0;
+ buffers[i]->filp = filp;
+
+ if (DRM_COPY_TO_USER(&d->request_indices[i], &buffers[i]->idx,
+ sizeof(buffers[i]->idx)))
+ return DRM_ERR(EFAULT);
+
+ if (DRM_COPY_TO_USER(&d->request_sizes[i], &buffers[i]->total,
+ sizeof(buffers[i]->total)))
+ return DRM_ERR(EFAULT);
+ }
+
+ return 0;
+}
+#endif
int radeon_cp_buffers(DRM_IOCTL_ARGS)
{
DRM_DEVICE;
dma_ext_r300.patch
Description: Binary data
