On 2002.05.16 21:37 Jos� Fonseca wrote:
> ... I'm gonna devise a test for that: it will monitor the BM_GUI_TABLE
> value and change the value at the last moment (like waiting for the
> train to come to cross the line!)
>
> ...
Here is the commented output of such a test that waits for the card reach
the n-2 entry of a n-entry descriptor table to change it:
this timeout is the number of loops to wait for the card reach to the
n-2 entry. (n was 129)
<6>[drm] timeout = 0x00000085
this is the contents of the registers. COMMAND was read last so it is
the value immediately before the change
<6>[drm] GUI_TABLE = 0x002983f0 COMMAND = 0x40000000
*the change to the to n entry (0x00298410) happened now*
this is the contents of the registers. COMMAND was read first so it is
the value immediately after the change. <6>[drm] GUI_TABLE =
0x00298410 COMMAND = 0xc0000018
it was too late as COMMAND has the last bit set. as you can see above
all this wasn't so fast as I initially though, since n-1 entry
(0x00298400) was processed and didn't got recorded.
this is the rest of the history
<6>[drm] GUI_TABLE = 0x00298410 COMMAND = 0xc0000000
this is the contents of the registers written in the DMA buffers
<6>[drm] (After DMA Transfer) reg 0 = 0x22223333
<6>[drm] (After DMA Transfer) reg 1 = 0x22223333
<6>[drm] (After DMA Transfer) reg 2 = 0x22223333
the register values tell if the n+1 got processed or not
Now the same test as above but waiting for the n-3 entry:
<6>[drm] timeout = 0x00000084
the n-3 entry (0x000803e0)
<6>[drm] GUI_TABLE = 0x000803e0 COMMAND = 0x40000000
*the change to the to n entry (0x00298410) happened now*
the n-1 entry (0x00080400), again during the change the card advanced
one entry
<6>[drm] GUI_TABLE = 0x00080400 COMMAND = 0x40000018
this time it was in time. last bit of COMMAND wasn't set!!
the rest of the history
<6>[drm] GUI_TABLE = 0x00080410 COMMAND = 0x40000000
<6>[drm] GUI_TABLE = 0x00080420 COMMAND = 0xc0000000
you can see it advance to the n+1 entry!!
<6>[drm] (After DMA Transfer) reg 0 = 0x33331111
bingo: the register values were different
So both tests are a succesful in showing that there is no buffering of the
descriptor table and that the END_OF_LIST_STATUS@BM_COMMAND is *almost*
enough to determine if the change was successful!
The "almost" is because there is one remote but yet possible race
condition that we need to account. At least in this test (which code had
memory references during the change) the card processed a buffer (which
are only 24 bytes) during the change. This means that with slower
computers if we stich just one more entry to a very small buffer (as it
was done here) the card may finish process our new buffer even before we
read BM_COMMAND. This would makes us think that the buffer wasn't
processed and we would submit it twice.
To resolve this race condition it's not really necessary to have scratch
registers with buffer aging. It's enough to wait and see in which
GUI_TABLE the card stops.
In summary, we can proceed with implementing these ideas which will allows
to have to card running contnuously since there is no indication
whatsoever that this can fail.
I also restate here that I believe that this ability of the chip was
surely a design option, otherwise there would be no reason for a circular
descriptor table and this table would probably also cached. I might
contact ATI to discuss this later on.
Again, this test code is attached in case anyone wants to review it.
Regards,
Jos� Fonseca
static int mach64_bm_dma_test( drm_device_t *dev )
{
drm_mach64_private_t *dev_priv = dev->dev_private;
dma_addr_t data_handle, data2_handle, data3_handle, table2_handle;
void *cpu_addr_data, *cpu_addr_data2, *cpu_addr_data3, *cpu_addr_table2;
u32 data_addr, data2_addr, data3_addr, table2_addr;
u32 *table, *data, *table2, *data2, *data3;
u32 regs[3], expected[3];
int i, j;
DRM_DEBUG( "%s\n", __FUNCTION__ );
table = (u32 *) dev_priv->cpu_addr_table;
/* FIXME: get a dma buffer from the freelist here rather than using the pool */
DRM_DEBUG( "Allocating data memory ...\n" );
cpu_addr_data = pci_pool_alloc( dev_priv->pool, SLAB_ATOMIC, &data_handle );
cpu_addr_data2 = pci_pool_alloc( dev_priv->pool, SLAB_ATOMIC, &data2_handle );
cpu_addr_data3 = pci_pool_alloc( dev_priv->pool, SLAB_ATOMIC, &data3_handle );
cpu_addr_table2 = pci_alloc_consistent( NULL, 32*1024, &table2_handle );
if (!cpu_addr_data || !data_handle || !cpu_addr_data2 || !data2_handle || !cpu_addr_data3 || !data3_handle || !cpu_addr_table2 || !table2_handle) {
DRM_INFO( "data-memory allocation failed!\n" );
return -ENOMEM;
} else {
data = (u32 *) cpu_addr_data;
data_addr = (u32) data_handle;
data2 = (u32 *) cpu_addr_data2;
data2_addr = (u32) data2_handle;
data3 = (u32 *) cpu_addr_data3;
data3_addr = (u32) data3_handle;
table2 = (u32 *) cpu_addr_table2;
table2_addr = (u32) table2_handle;
}
MACH64_WRITE( MACH64_SRC_CNTL, 0x00000000 );
MACH64_WRITE( MACH64_VERTEX_1_S, 0x00000000 );
MACH64_WRITE( MACH64_VERTEX_1_T, 0x00000000 );
MACH64_WRITE( MACH64_VERTEX_1_W, 0x00000000 );
for (i=0; i < 3; i++) {
DRM_DEBUG( "(before dma transfer) reg %d = 0x%08x\n", i,
MACH64_READ( (MACH64_VERTEX_1_S + i*4) ) );
}
/* 1_90 = vertex_1_s, setup 3 sequential reg writes */
/* use only s,t,w vertex registers so we don't have to mask any results */
data[0] = cpu_to_le32(0x00020190);
data[1] = 0x11111111;
data[2] = 0x11112222;
data[3] = 0x11113333;
data[4] = cpu_to_le32(0x0000006d); /* src_cntl */
data[5] = 0x00000000;
data2[0] = cpu_to_le32(0x00020190);
data2[1] = expected[0] = 0x22223333;
data2[2] = expected[1] = 0x22223333;
data2[3] = expected[2] = 0x22223333;
data2[4] = cpu_to_le32(0x0000006d); /* src_cntl */
data2[5] = 0x00000000;
data3[0] = cpu_to_le32(0x00020190);
data3[1] = 0x33331111;
data3[2] = 0x33332222;
data3[3] = 0x33333333;
data3[4] = cpu_to_le32(0x0000006d); /* src_cntl */
data3[5] = 0x00000000;
DRM_DEBUG( "preparing table ...\n" );
for (j = 0; j < 64; ++j) {
table2[j*4+0] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);
table2[j*4+1] = cpu_to_le32(data_addr);
table2[j*4+2] = cpu_to_le32(6 * sizeof( u32 ) | 0x40000000);
table2[j*4+3] = 0;
}
table2[j*4+0] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);
table2[j*4+1] = cpu_to_le32(data2_addr);
table2[j*4+2] = cpu_to_le32(6 * sizeof( u32 ) | 0x80000000 | 0x40000000);
table2[j*4+3] = 0;
table2[j*4+4] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);
table2[j*4+5] = cpu_to_le32(data3_addr);
table2[j*4+6] = cpu_to_le32(6 * sizeof( u32 ) | 0x80000000 | 0x40000000);
table2[j*4+7] = 0;
DRM_DEBUG( "table[0] = 0x%08x\n", table[0] );
DRM_DEBUG( "table[1] = 0x%08x\n", table[1] );
DRM_DEBUG( "table[2] = 0x%08x\n", table[2] );
DRM_DEBUG( "table[3] = 0x%08x\n", table[3] );
for ( i = 0 ; i < 6 ; i++) {
DRM_DEBUG( " data[%d] = 0x%08x\n", i, data[i] );
}
mach64_flush_write_combine();
DRM_DEBUG( "waiting for idle...\n" );
if ( ( i = mach64_do_wait_for_idle( dev_priv ) ) ) {
DRM_INFO( "mach64_do_wait_for_idle failed (result=%d)\n", i);
DRM_INFO( "resetting engine ...\n");
mach64_do_engine_reset( dev );
DRM_INFO( "freeing data buffer memory.\n" );
pci_pool_free( dev_priv->pool, cpu_addr_data, data_handle );
pci_pool_free( dev_priv->pool, cpu_addr_data2, data2_handle );
pci_pool_free( dev_priv->pool, cpu_addr_data3, data3_handle );
pci_free_consistent( NULL, 32*1024, cpu_addr_table2, table2_handle );
DRM_INFO( "returning ...\n" );
return i;
}
DRM_DEBUG( "waiting for idle...done\n" );
DRM_DEBUG( "BUS_CNTL = 0x%08x\n", MACH64_READ( MACH64_BUS_CNTL ) );
DRM_DEBUG( "SRC_CNTL = 0x%08x\n", MACH64_READ( MACH64_SRC_CNTL ) );
DRM_DEBUG( "\n" );
DRM_DEBUG( "data bus addr = 0x%08x\n", data_addr );
DRM_DEBUG( "table bus addr = 0x%08x\n", dev_priv->table_addr );
DRM_INFO( "starting DMA transfer...\n" );
MACH64_WRITE( MACH64_BM_GUI_TABLE,
/*dev_priv->table_addr |*/
table2_addr |
MACH64_CIRCULAR_BUF_SIZE_16KB );
MACH64_WRITE( MACH64_SRC_CNTL,
MACH64_SRC_BM_ENABLE | MACH64_SRC_BM_SYNC |
MACH64_SRC_BM_OP_SYSTEM_TO_REG );
/* Kick off the transfer */
DRM_DEBUG( "starting DMA transfer... done.\n" );
MACH64_WRITE( MACH64_DST_HEIGHT_WIDTH, 0 );
i = 0;
while(MACH64_READ( MACH64_BM_GUI_TABLE ) != ((table2_addr + 4*(j-1)*4) | MACH64_CIRCULAR_BUF_SIZE_16KB ) && i < 0x100000)
++i;
data2[64] = MACH64_READ( MACH64_BM_GUI_TABLE );
data3[64] = MACH64_READ( MACH64_BM_COMMAND );
table2[j*4+2] = cpu_to_le32(6 * sizeof( u32 ) | 0x40000000);
data3[65] = MACH64_READ( MACH64_BM_COMMAND );
data2[65] = MACH64_READ( MACH64_BM_GUI_TABLE );
#if 0
table2[j*4+4] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);
table2[j*4+5] = cpu_to_le32(data2_addr);
table2[j*4+6] = cpu_to_le32(6 * sizeof( u32 ) | 0x80000000 | 0x40000000);
table2[j*4+7] = 0;
table2[0*4+1] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);
table2[0*4+2] = cpu_to_le32(data3_addr);
table2[0*4+3] = cpu_to_le32(6 * sizeof( u32 ) | 0x80000000 | 0x40000000);
table2[0*4+4] = 0;
/* Overwrite the last descriptor of table */
/*table2[j*4+0] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);*/
/*table2[j*4+1] = cpu_to_le32(data_addr);*/
table2[j*4+2] = cpu_to_le32(6 * sizeof( u32 ) | 0x40000000);
/*table2[j*4+3] = 0;*/
#endif
data[64] = i;
for (i = 66; i < 1024; ++i) {
data2[i] = MACH64_READ( MACH64_BM_GUI_TABLE );
data3[i] = MACH64_READ( MACH64_BM_COMMAND );
}
DRM_INFO( "waiting for idle...\n" );
/* engine locked up, dump register state and reset */
DRM_INFO( "mach64_do_wait_for_idle failed (result=%d)\n", i);
mach64_dump_engine_info( dev_priv );
if ( ( i = mach64_do_wait_for_idle( dev_priv ) ) ) {
DRM_INFO( " timeout = 0x%08x\n", data[64]);
for (i = 64; i < 1024; ++i) {
if( i <= 65 || data2[i] != data2[i-1])
DRM_INFO( " EG1 = 0x%08x REG2 = 0x%08x\n", data2[i], data3[i] );
}
DRM_INFO( "resetting engine ...\n");
mach64_do_engine_reset( dev );
DRM_INFO( "freeing data buffer memory.\n" );
pci_pool_free( dev_priv->pool, cpu_addr_data, data_handle );
pci_pool_free( dev_priv->pool, cpu_addr_data2, data2_handle );
pci_pool_free( dev_priv->pool, cpu_addr_data3, data3_handle );
pci_free_consistent( NULL, 32*1024, cpu_addr_table2, table2_handle );
DRM_INFO( "returning ...\n" );
return i;
}
DRM_INFO( "waitini for idle...done\n" );
mach64_dump_engine_info( dev_priv );
DRM_INFO( " timeout = 0x%08x\n", data[64]);
for (i = 64; i < 1024; ++i) {
if( i <= 65 || data2[i] != data2[i-1] || data3[i] != data3[i-1])
DRM_INFO( " GUI_TABLE = 0x%08x COMMAND = 0x%08x\n", data2[i], data3[i] );
}
/* Check register values to see if the GUI master operation succeeded */
for ( i = 0; i < 3; i++ ) {
regs[i] = MACH64_READ( (MACH64_VERTEX_1_S + i*4) );
DRM_INFO( "(After DMA Transfer) reg %d = 0x%08x\n", i, regs[i] );
DRM_DEBUG( "(After DMA Transfer) reg %d = 0x%08x\n", i, regs[i] );
if (regs[i] != expected[i])
return -1; /* GUI master operation failed */
}
DRM_DEBUG( "freeing data buffer memory.\n" );
pci_pool_free( dev_priv->pool, cpu_addr_data, data_handle );
pci_pool_free( dev_priv->pool, cpu_addr_data2, data2_handle );
pci_pool_free( dev_priv->pool, cpu_addr_data3, data3_handle );
pci_free_consistent( NULL, 32*1024, cpu_addr_table2, table2_handle );
DRM_DEBUG( "returning ...\n" );
return 0;
}