Same thing as for r200, but for r100. Effects are even better,
according to ipers. Anyone want to do some testing before I commit?
--
Eric Anholt [EMAIL PROTECTED]
http://people.freebsd.org/~anholt/ [EMAIL PROTECTED]
Index: radeon_compat.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_compat.c,v
retrieving revision 1.3
diff -u -r1.3 radeon_compat.c
--- radeon_compat.c 11 Mar 2004 20:35:41 -0000 1.3
+++ radeon_compat.c 24 Sep 2004 01:49:57 -0000
@@ -202,30 +202,28 @@
static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
{
- struct radeon_state_atom *state, *tmp;
+ struct radeon_state_atom *atom;
if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
fprintf(stderr, "%s\n", __FUNCTION__);
- if (rmesa->lost_context) {
- if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
- fprintf(stderr, "%s - lost context\n", __FUNCTION__);
+ if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
+ return;
- foreach_s( state, tmp, &(rmesa->hw.clean) )
- move_to_tail(&(rmesa->hw.dirty), state );
-
- rmesa->lost_context = 0;
- }
-
- foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
- if (!state->is_tcl)
- radeonCompatEmitPacket( rmesa, state );
- move_to_head( &(rmesa->hw.clean), state );
+ foreach( atom, &rmesa->hw.atomlist ) {
+ if ( rmesa->hw.all_dirty )
+ atom->dirty = GL_TRUE;
+ if ( atom->is_tcl )
+ atom->dirty = GL_FALSE;
+ if ( atom->dirty )
+ radeonCompatEmitPacket( rmesa, atom );
}
+
+ rmesa->hw.is_dirty = GL_FALSE;
+ rmesa->hw.all_dirty = GL_FALSE;
}
-
static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
GLuint hw_primitive,
GLuint nverts,
Index: radeon_context.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_context.c,v
retrieving revision 1.22
diff -u -r1.22 radeon_context.c
--- radeon_context.c 4 Jul 2004 20:33:50 -0000 1.22
+++ radeon_context.c 24 Sep 2004 01:49:57 -0000
@@ -306,7 +306,7 @@
DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
rmesa->swtcl.RenderIndex = ~0;
- rmesa->lost_context = 1;
+ rmesa->hw.all_dirty = GL_TRUE;
/* Set the maximum texture size small enough that we can guarentee that
* all texture units can bind a maximal texture and have them both in
Index: radeon_context.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_context.h,v
retrieving revision 1.15
diff -u -r1.15 radeon_context.h
--- radeon_context.h 17 Aug 2004 01:41:29 -0000 1.15
+++ radeon_context.h 24 Sep 2004 01:49:57 -0000
@@ -185,6 +185,7 @@
GLuint is_tcl;
int *cmd; /* one or more cmd's */
int *lastcmd; /* one or more cmd's */
+ int *savedcmd; /* one or more cmd's */
GLboolean dirty; /* dirty-mark in emit_state_list */
GLboolean (*check)( GLcontext * ); /* is this state active? */
};
@@ -398,14 +399,11 @@
struct radeon_hw_state {
- /* All state should be on one of these lists:
- */
- struct radeon_state_atom dirty; /* dirty list head placeholder */
- struct radeon_state_atom clean; /* clean list head placeholder */
+ /* Head of the linked list of state atoms. */
+ struct radeon_state_atom atomlist;
/* Hardware state, stored as cmdbuf commands:
* -- Need to doublebuffer for
- * - reviving state after loss of context
* - eliding noop statechange loops? (except line stipple count)
*/
struct radeon_state_atom ctx;
@@ -428,6 +426,7 @@
struct radeon_state_atom txr[2]; /* for NPOT */
int max_state_size; /* Number of bytes necessary for a full state emit. */
+ GLboolean is_dirty, all_dirty;
};
struct radeon_state {
@@ -733,6 +732,7 @@
drm_clip_rect_t *pClipRects;
unsigned int lastStamp;
GLboolean lost_context;
+ GLboolean save_on_next_unlock;
radeonScreenPtr radeonScreen; /* Screen private DRI data */
drm_radeon_sarea_t *sarea; /* Private SAREA data */
Index: radeon_ioctl.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_ioctl.c,v
retrieving revision 1.13
diff -u -r1.13 radeon_ioctl.c
--- radeon_ioctl.c 17 Aug 2004 20:10:29 -0000 1.13
+++ radeon_ioctl.c 24 Sep 2004 01:59:47 -0000
@@ -59,6 +59,65 @@
static void radeonWaitForIdle( radeonContextPtr rmesa );
+static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
+ const char * caller );
+
+void radeonSaveHwState( radeonContextPtr rmesa )
+{
+ struct radeon_state_atom *atom;
+
+ foreach( atom, &rmesa->hw.atomlist )
+ memcpy(atom->savedcmd, atom->cmd, atom->cmd_size * 4);
+}
+
+static void radeonSwapHwState( radeonContextPtr rmesa )
+{
+ int *temp;
+ struct radeon_state_atom *atom;
+
+ foreach( atom, &rmesa->hw.atomlist ) {
+ temp = atom->cmd;
+ atom->cmd = atom->savedcmd;
+ atom->savedcmd = temp;
+ }
+}
+
+/* At this point we were in FlushCmdBufLocked but we had lost our context, so
+ * we need to unwire our current cmdbuf and hook a new one in, emit that, then
+ * wire the old cmdbuf back in so that FlushCmdBufLocked can continue and the
+ * buffer can depend on the state not being lost across lock/unlock.
+ */
+static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
+{
+ GLuint nr_released_bufs;
+ struct radeon_store store;
+ struct radeon_hw_state temp_state;
+ static int count = 0;
+
+ rmesa->lost_context = GL_FALSE;
+
+ nr_released_bufs = rmesa->dma.nr_released_bufs;
+ store = rmesa->store;
+ rmesa->store.statenr = 0;
+ rmesa->store.primnr = 0;
+ rmesa->store.cmd_used = 0;
+ rmesa->store.elts_start = 0;
+ rmesa->hw.all_dirty = GL_TRUE;
+ radeonSwapHwState( rmesa );
+ /* In this case it's okay to EmitState while locked because we won't exhaust
+ * our (empty) cmdbuf.
+ */
+ radeonEmitState( rmesa );
+ radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+
+ radeonSwapHwState( rmesa );
+ /* We've just cleared out the dirty flags, so we don't remember what
+ * actually needed to be emitted for the next state emit.
+ */
+ rmesa->hw.all_dirty = GL_TRUE;
+ rmesa->dma.nr_released_bufs = nr_released_bufs;
+ rmesa->store = store;
+}
/* =============================================================
* Kernel command buffer handling
@@ -76,115 +135,95 @@
}
-static void radeon_emit_state_list( radeonContextPtr rmesa,
- struct radeon_state_atom *list )
+/* The state atoms will be emitted in the order they appear in the atom list,
+ * so this step is important.
+ */
+void radeonSetUpAtomList( radeonContextPtr rmesa )
{
- struct radeon_state_atom *state, *tmp;
- char *dest;
- int i, size, texunits;
+ int i, mtu = rmesa->glCtx->Const.MaxTextureUnits;;
- /* It appears that some permutations of state atoms lock up the
- * chip. Therefore we make sure that state atoms are emitted in a
- * fixed order. First mark all dirty state atoms and then go
- * through all state atoms in a well defined order and emit only
- * the marked ones.
- * FIXME: This requires knowledge of which state atoms exist.
- * FIXME: Is the zbs hack below still needed?
- */
- size = 0;
- foreach_s( state, tmp, list ) {
- if (state->check( rmesa->glCtx )) {
- size += state->cmd_size;
- state->dirty = GL_TRUE;
- move_to_head( &(rmesa->hw.clean), state );
- if (RADEON_DEBUG & DEBUG_STATE)
- print_state_atom( state );
- }
- else if (RADEON_DEBUG & DEBUG_STATE)
- fprintf(stderr, "skip state %s\n", state->name);
- }
- /* short cut */
- if (!size)
- return;
-
- dest = radeonAllocCmdBuf( rmesa, size * 4, __FUNCTION__);
- texunits = rmesa->glCtx->Const.MaxTextureUnits;
-
-#define EMIT_ATOM(ATOM) \
-do { \
- if (rmesa->hw.ATOM.dirty) { \
- rmesa->hw.ATOM.dirty = GL_FALSE; \
- memcpy( dest, rmesa->hw.ATOM.cmd, rmesa->hw.ATOM.cmd_size * 4); \
- dest += rmesa->hw.ATOM.cmd_size * 4; \
- } \
-} while (0)
-
- EMIT_ATOM (ctx);
- EMIT_ATOM (set);
- EMIT_ATOM (lin);
- EMIT_ATOM (msk);
- EMIT_ATOM (vpt);
- EMIT_ATOM (tcl);
- EMIT_ATOM (msc);
- for (i = 0; i < texunits; ++i) {
- EMIT_ATOM (tex[i]);
- EMIT_ATOM (txr[i]);
- }
- EMIT_ATOM (zbs);
- EMIT_ATOM (mtl);
- for (i = 0; i < 3 + texunits; ++i)
- EMIT_ATOM (mat[i]);
+ make_empty_list(&rmesa->hw.atomlist);
+ rmesa->hw.atomlist.name = "atom-list";
+
+ /* Does zbs need to go at the end? */
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc );
+ for (i = 0; i < mtu; ++i) {
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.txr[i] );
+ }
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl );
+ for (i = 0; i < 3 + mtu; ++i)
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] );
for (i = 0; i < 8; ++i)
- EMIT_ATOM (lit[i]);
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
for (i = 0; i < 6; ++i)
- EMIT_ATOM (ucp[i]);
- EMIT_ATOM (eye);
- EMIT_ATOM (grd);
- EMIT_ATOM (fog);
- EMIT_ATOM (glt);
-
-#undef EMIT_ATOM
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt );
}
-
void radeonEmitState( radeonContextPtr rmesa )
{
- struct radeon_state_atom *state, *tmp;
+ struct radeon_state_atom *atom;
+ char *dest;
+ int i;
if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
fprintf(stderr, "%s\n", __FUNCTION__);
- /* Somewhat overkill:
+ if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
+ return;
+
+ /* To avoid going across the entire set of states multiple times, just check
+ * for enough space for the case of emitting all state, and inline the
+ * radeonAllocCmdBuf code here without all the checks.
*/
- if (rmesa->lost_context) {
- if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
- fprintf(stderr, "%s - lost context\n", __FUNCTION__);
-
- foreach_s( state, tmp, &(rmesa->hw.clean) )
- move_to_tail(&(rmesa->hw.dirty), state );
-
- rmesa->lost_context = 0;
- }
- else if (1) {
- /* This is a darstardly kludge to work around a lockup that I
- * haven't otherwise figured out.
- */
- move_to_tail(&(rmesa->hw.dirty), &(rmesa->hw.zbs) );
+ radeonEnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size );
+ dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+
+ if (RADEON_DEBUG & DEBUG_STATE) {
+ foreach( atom, &rmesa->hw.atomlist ) {
+ if ( atom->dirty || rmesa->hw.all_dirty ) {
+ if ( atom->check( rmesa->glCtx ) )
+ print_state_atom( atom );
+ else
+ fprintf(stderr, "skip state %s\n", atom->name);
+ }
+ }
}
- if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
- foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
- if (state->is_tcl) {
- move_to_head( &(rmesa->hw.clean), state );
- }
- }
+ foreach( atom, &rmesa->hw.atomlist ) {
+ if ( rmesa->hw.all_dirty )
+ atom->dirty = GL_TRUE;
+ if ( !(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) &&
+ atom->is_tcl )
+ atom->dirty = GL_FALSE;
+ if ( atom->dirty ) {
+ if ( atom->check( rmesa->glCtx ) ) {
+ int size = atom->cmd_size * 4;
+ memcpy( dest, atom->cmd, size);
+ dest += size;
+ rmesa->store.cmd_used += size;
+ atom->dirty = GL_FALSE;
+ }
+ }
}
- radeon_emit_state_list( rmesa, &rmesa->hw.dirty );
+ assert( rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ );
+
+ rmesa->hw.is_dirty = GL_FALSE;
+ rmesa->hw.all_dirty = GL_FALSE;
}
-
-
/* Fire a section of the retained (indexed_verts) buffer as a regular
* primtive.
*/
@@ -491,6 +530,9 @@
int ret, i;
drm_radeon_cmd_buffer_t cmd;
+ if (rmesa->lost_context)
+ radeonBackUpAndEmitLostStateLocked( rmesa );
+
if (RADEON_DEBUG & DEBUG_IOCTL) {
fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
@@ -544,18 +586,7 @@
rmesa->store.statenr = 0;
rmesa->store.cmd_used = 0;
rmesa->dma.nr_released_bufs = 0;
- /* Set lost_context so that the first state emit on the new buffer is a full
- * one. This is because the context might get lost while preparing the next
- * buffer, and when we lock and find out, we don't have the information to
- * recreate the state. This function should always be called before the new
- * buffer is begun, so it's sufficient to just set lost_context here.
- *
- * The alternative to this would be to copy out the state on unlock
- * (approximately) and if we did lose the context, dispatch a cmdbuf to reset
- * the state to that old copy before continuing with the accumulated command
- * buffer.
- */
- rmesa->lost_context = 1;
+ rmesa->save_on_next_unlock = 1;
return ret;
}
@@ -897,6 +928,7 @@
}
rmesa->swap_ust = ust;
+ rmesa->hw.all_dirty = GL_TRUE;
}
void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
@@ -1028,13 +1060,6 @@
cx += dPriv->x;
cy = dPriv->y + dPriv->h - cy - ch;
- /* We have to emit state along with the clear, since the kernel relies on
- * some of it. The EmitState that was above RADEON_FIREVERTICES was an
- * attempt to do that, except that another context may come in and cause us
- * to lose our context while we're unlocked.
- */
- radeonEmitState( rmesa );
-
LOCK_HARDWARE( rmesa );
/* Throttle the number of clear ioctls we do.
@@ -1146,6 +1171,7 @@
}
UNLOCK_HARDWARE( rmesa );
+ rmesa->hw.all_dirty = GL_TRUE;
}
@@ -1189,8 +1215,7 @@
if (rmesa->dma.flush)
rmesa->dma.flush( rmesa );
- if (!is_empty_list(&rmesa->hw.dirty))
- radeonEmitState( rmesa );
+ radeonEmitState( rmesa );
if (rmesa->store.cmd_used)
radeonFlushCmdBuf( rmesa, __FUNCTION__ );
Index: radeon_ioctl.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_ioctl.h,v
retrieving revision 1.3
diff -u -r1.3 radeon_ioctl.h
--- radeon_ioctl.h 17 Aug 2004 01:41:29 -0000 1.3
+++ radeon_ioctl.h 24 Sep 2004 01:49:57 -0000
@@ -104,6 +104,9 @@
extern void radeonInitIoctlFuncs( GLcontext *ctx );
extern void radeonGetAllParams( radeonContextPtr rmesa );
+extern void radeonSaveHwState( radeonContextPtr rmesa );
+extern void radeonSetUpAtomList( radeonContextPtr rmesa );
+
/* radeon_compat.c:
*/
extern void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
@@ -111,7 +114,6 @@
GLuint hw_primitive,
GLuint nrverts );
-
/* ================================================================
* Helper macros:
*/
@@ -130,7 +132,8 @@
#define RADEON_STATECHANGE( rmesa, ATOM ) \
do { \
RADEON_NEWPRIM( rmesa ); \
- move_to_head( &(rmesa->hw.dirty), &(rmesa->hw.ATOM)); \
+ rmesa->hw.ATOM.dirty = GL_TRUE; \
+ rmesa->hw.is_dirty = GL_TRUE; \
} while (0)
#define RADEON_DB_STATE( ATOM ) \
@@ -144,7 +147,8 @@
if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
int *tmp;
RADEON_NEWPRIM( rmesa );
- move_to_head( &(rmesa->hw.dirty), atom );
+ atom->dirty = GL_TRUE;
+ rmesa->hw.is_dirty = GL_TRUE;
tmp = atom->cmd;
atom->cmd = atom->lastcmd;
atom->lastcmd = tmp;
Index: radeon_lock.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_lock.c,v
retrieving revision 1.6
diff -u -r1.6 radeon_lock.c
--- radeon_lock.c 21 Mar 2004 17:05:04 -0000 1.6
+++ radeon_lock.c 24 Sep 2004 01:49:57 -0000
@@ -124,4 +124,6 @@
DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] );
}
}
+
+ rmesa->lost_context = GL_TRUE;
}
Index: radeon_lock.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_lock.h,v
retrieving revision 1.4
diff -u -r1.4 radeon_lock.h
--- radeon_lock.h 17 Aug 2004 20:10:29 -0000 1.4
+++ radeon_lock.h 24 Sep 2004 01:49:57 -0000
@@ -105,6 +105,10 @@
rmesa->dri.hwLock, \
rmesa->dri.hwContext ); \
DEBUG_RESET(); \
+ if (rmesa->save_on_next_unlock) { \
+ radeonSaveHwState( rmesa ); \
+ rmesa->save_on_next_unlock = GL_FALSE; \
+ } \
} while (0)
#endif
Index: radeon_state_init.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_state_init.c,v
retrieving revision 1.8
diff -u -r1.8 radeon_state_init.c
--- radeon_state_init.c 17 Aug 2004 01:41:29 -0000 1.8
+++ radeon_state_init.c 24 Sep 2004 02:07:27 -0000
@@ -59,8 +59,9 @@
fprintf(stderr, msg);
fprintf(stderr, ": ");
- foreach(l, &(rmesa->hw.dirty)) {
- fprintf(stderr, "%s, ", l->name);
+ foreach(l, &rmesa->hw.atomlist) {
+ if (l->dirty || rmesa->hw.all_dirty)
+ fprintf(stderr, "%s, ", l->name);
}
fprintf(stderr, "\n");
@@ -197,11 +198,6 @@
rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
rmesa->state.pixel.readPitch = rmesa->state.color.drawPitch;
- /* Initialize lists:
- */
- make_empty_list(&(rmesa->hw.dirty));
- make_empty_list(&(rmesa->hw.clean));
-
rmesa->hw.max_state_size = 0;
#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \
@@ -209,10 +205,11 @@
rmesa->hw.ATOM.cmd_size = SZ; \
rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \
rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \
+ rmesa->hw.ATOM.savedcmd = (int *)CALLOC(SZ * sizeof(int)); \
rmesa->hw.ATOM.name = NM; \
rmesa->hw.ATOM.is_tcl = FLAG; \
rmesa->hw.ATOM.check = check_##CHK; \
- insert_at_head(&(rmesa->hw.dirty), &(rmesa->hw.ATOM)); \
+ rmesa->hw.ATOM.dirty = GL_TRUE; \
rmesa->hw.max_state_size += SZ * sizeof(int); \
} while (0)
@@ -256,6 +253,7 @@
ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
+ radeonSetUpAtomList( rmesa );
/* Fill in the packet headers:
*/
@@ -552,4 +550,7 @@
rmesa->hw.eye.cmd[EYE_Y] = 0;
rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+
+ radeonSaveHwState( rmesa );
+ rmesa->hw.all_dirty = GL_TRUE;
}