On 2002.05.16 21:37 Jos� Fonseca wrote:
> ... I'm gonna devise a test for that: it will monitor the BM_GUI_TABLE 
> value and change the value at the last moment (like waiting for the 
> train to come to cross the line!)
> 
> ...

Here is the commented output of such a test that waits for the card reach 
the n-2 entry of a n-entry descriptor table to change it:

   this timeout is the number of loops to wait for the card reach to the 
n-2 entry. (n was 129)
        <6>[drm]  timeout = 0x00000085

   this is the contents of the registers. COMMAND was read last so it is 
the value immediately before the change
        <6>[drm]  GUI_TABLE = 0x002983f0   COMMAND = 0x40000000

   *the change to the to n entry (0x00298410) happened now*

   this is the contents of the registers. COMMAND was read first so it is 
the value immediately after the change.         <6>[drm]  GUI_TABLE = 
0x00298410   COMMAND = 0xc0000018
   it was too late as COMMAND has the last bit set. as you can see above 
all this wasn't so fast as I initially though, since n-1 entry 
(0x00298400) was processed and didn't got recorded.

   this is the rest of the history
        <6>[drm]  GUI_TABLE = 0x00298410   COMMAND = 0xc0000000

   this is the contents of the registers written in the DMA buffers
        <6>[drm] (After DMA Transfer) reg 0 = 0x22223333
        <6>[drm] (After DMA Transfer) reg 1 = 0x22223333
        <6>[drm] (After DMA Transfer) reg 2 = 0x22223333
   the register values tell if the n+1 got processed or not


Now the same test as above but waiting for the n-3 entry:

        <6>[drm]  timeout = 0x00000084

   the n-3 entry (0x000803e0)
        <6>[drm]  GUI_TABLE = 0x000803e0   COMMAND = 0x40000000

   *the change to the to n entry (0x00298410) happened now*

   the n-1 entry (0x00080400), again during the change the card advanced 
one entry
        <6>[drm]  GUI_TABLE = 0x00080400   COMMAND = 0x40000018
   this time it was in time. last bit of COMMAND wasn't set!!

   the rest of the history
        <6>[drm]  GUI_TABLE = 0x00080410   COMMAND = 0x40000000
        <6>[drm]  GUI_TABLE = 0x00080420   COMMAND = 0xc0000000
   you can see it advance to the n+1 entry!!

        <6>[drm] (After DMA Transfer) reg 0 = 0x33331111
   bingo: the register values were different



So both tests are a succesful in showing that there is no buffering of the 
descriptor table and that the END_OF_LIST_STATUS@BM_COMMAND is *almost* 
enough to determine if the change was successful!

The "almost" is because there is one remote but yet possible race 
condition that we need to account. At least in this test (which code had 
memory references during the change) the card processed a buffer (which 
are only 24 bytes) during the change. This means that with slower 
computers if we stich just one more entry to a very small buffer (as it 
was done here) the card may finish process our new buffer even before we 
read BM_COMMAND. This would makes us think that the buffer wasn't 
processed and we would submit it twice.

To resolve this race condition it's not really necessary to have scratch 
registers with buffer aging. It's enough to wait and see in which 
GUI_TABLE the card stops.

In summary, we can proceed with implementing these ideas which will allows 
to have to card running contnuously since there is no indication 
whatsoever that this can fail.

I also restate here that I believe that this ability of the chip was 
surely a design option, otherwise there would be no reason for a circular 
descriptor table and this table would probably also cached. I might 
contact ATI to discuss this later on.

Again, this test code is attached in case anyone wants to review it.


Regards,

Jos� Fonseca
static int mach64_bm_dma_test( drm_device_t *dev )
{
	drm_mach64_private_t *dev_priv = dev->dev_private;
	dma_addr_t data_handle, data2_handle, data3_handle, table2_handle;
	void *cpu_addr_data, *cpu_addr_data2, *cpu_addr_data3, *cpu_addr_table2;
	u32 data_addr, data2_addr, data3_addr, table2_addr;
	u32 *table, *data, *table2, *data2, *data3;
	u32 regs[3], expected[3];
	int i, j;

	DRM_DEBUG( "%s\n", __FUNCTION__ );

	table = (u32 *) dev_priv->cpu_addr_table;

	/* FIXME: get a dma buffer from the freelist here rather than using the pool */
	DRM_DEBUG( "Allocating data memory ...\n" );
	cpu_addr_data = pci_pool_alloc( dev_priv->pool, SLAB_ATOMIC, &data_handle );
	cpu_addr_data2 = pci_pool_alloc( dev_priv->pool, SLAB_ATOMIC, &data2_handle );
	cpu_addr_data3 = pci_pool_alloc( dev_priv->pool, SLAB_ATOMIC, &data3_handle );
	cpu_addr_table2 = pci_alloc_consistent( NULL, 32*1024, &table2_handle );
	if (!cpu_addr_data || !data_handle || !cpu_addr_data2 || !data2_handle || !cpu_addr_data3 || !data3_handle || !cpu_addr_table2 || !table2_handle) {
		DRM_INFO( "data-memory allocation failed!\n" );
		return -ENOMEM;
	} else {
		data = (u32 *) cpu_addr_data;
		data_addr = (u32) data_handle;
		data2 = (u32 *) cpu_addr_data2;
		data2_addr = (u32) data2_handle;
		data3 = (u32 *) cpu_addr_data3;
		data3_addr = (u32) data3_handle;
		table2 = (u32 *) cpu_addr_table2;
		table2_addr = (u32) table2_handle;
	}

	MACH64_WRITE( MACH64_SRC_CNTL, 0x00000000 );

	MACH64_WRITE( MACH64_VERTEX_1_S, 0x00000000 );
	MACH64_WRITE( MACH64_VERTEX_1_T, 0x00000000 );
	MACH64_WRITE( MACH64_VERTEX_1_W, 0x00000000 );
	
	for (i=0; i < 3; i++) {
		DRM_DEBUG( "(before dma transfer) reg %d = 0x%08x\n", i, 
			   MACH64_READ( (MACH64_VERTEX_1_S + i*4) ) );
	}

	/* 1_90 = vertex_1_s, setup 3 sequential reg writes */
	/* use only s,t,w vertex registers so we don't have to mask any results */
	data[0] = cpu_to_le32(0x00020190); 
	data[1] = 0x11111111;
	data[2] = 0x11112222;
	data[3] = 0x11113333;
	data[4] = cpu_to_le32(0x0000006d); /* src_cntl */
	data[5] = 0x00000000;

	data2[0] = cpu_to_le32(0x00020190); 
	data2[1] = expected[0] = 0x22223333;
	data2[2] = expected[1] = 0x22223333;
	data2[3] = expected[2] = 0x22223333;
	data2[4] = cpu_to_le32(0x0000006d); /* src_cntl */
	data2[5] = 0x00000000;

	data3[0] = cpu_to_le32(0x00020190); 
	data3[1] = 0x33331111;
	data3[2] = 0x33332222;
	data3[3] = 0x33333333;
	data3[4] = cpu_to_le32(0x0000006d); /* src_cntl */
	data3[5] = 0x00000000;

	DRM_DEBUG( "preparing table ...\n" );
	for (j = 0; j < 64; ++j) {
		table2[j*4+0] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);
		table2[j*4+1] = cpu_to_le32(data_addr);
		table2[j*4+2] = cpu_to_le32(6 * sizeof( u32 ) | 0x40000000);
		table2[j*4+3] = 0;
	}
	table2[j*4+0] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);
	table2[j*4+1] = cpu_to_le32(data2_addr);
	table2[j*4+2] = cpu_to_le32(6 * sizeof( u32 ) | 0x80000000 | 0x40000000);
	table2[j*4+3] = 0;
	
	table2[j*4+4] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);
	table2[j*4+5] = cpu_to_le32(data3_addr);
	table2[j*4+6] = cpu_to_le32(6 * sizeof( u32 ) | 0x80000000 | 0x40000000);
	table2[j*4+7] = 0;

	DRM_DEBUG( "table[0] = 0x%08x\n", table[0] );
	DRM_DEBUG( "table[1] = 0x%08x\n", table[1] );
	DRM_DEBUG( "table[2] = 0x%08x\n", table[2] );
	DRM_DEBUG( "table[3] = 0x%08x\n", table[3] );

	for ( i = 0 ; i < 6 ; i++) {
		DRM_DEBUG( " data[%d] = 0x%08x\n", i, data[i] );
	}

	mach64_flush_write_combine();

	DRM_DEBUG( "waiting for idle...\n" );
	if ( ( i = mach64_do_wait_for_idle( dev_priv ) ) ) {
		DRM_INFO( "mach64_do_wait_for_idle failed (result=%d)\n", i);
		DRM_INFO( "resetting engine ...\n");
		mach64_do_engine_reset( dev );
		DRM_INFO( "freeing data buffer memory.\n" );
		pci_pool_free( dev_priv->pool, cpu_addr_data, data_handle );
		pci_pool_free( dev_priv->pool, cpu_addr_data2, data2_handle );
		pci_pool_free( dev_priv->pool, cpu_addr_data3, data3_handle );
		pci_free_consistent( NULL, 32*1024, cpu_addr_table2, table2_handle );
		DRM_INFO( "returning ...\n" );
		return i;
	}
	DRM_DEBUG( "waiting for idle...done\n" );
	
	DRM_DEBUG( "BUS_CNTL = 0x%08x\n", MACH64_READ( MACH64_BUS_CNTL ) );
	DRM_DEBUG( "SRC_CNTL = 0x%08x\n", MACH64_READ( MACH64_SRC_CNTL ) );
	DRM_DEBUG( "\n" );
	DRM_DEBUG( "data bus addr = 0x%08x\n", data_addr );
	DRM_DEBUG( "table bus addr = 0x%08x\n", dev_priv->table_addr );

	DRM_INFO( "starting DMA transfer...\n" );
	MACH64_WRITE( MACH64_BM_GUI_TABLE,
			  /*dev_priv->table_addr |*/
			  table2_addr | 
			  MACH64_CIRCULAR_BUF_SIZE_16KB );

	MACH64_WRITE( MACH64_SRC_CNTL, 
		      MACH64_SRC_BM_ENABLE | MACH64_SRC_BM_SYNC |
		      MACH64_SRC_BM_OP_SYSTEM_TO_REG );

	/* Kick off the transfer */
	DRM_DEBUG( "starting DMA transfer... done.\n" );
	MACH64_WRITE( MACH64_DST_HEIGHT_WIDTH, 0 );

	i = 0;

	while(MACH64_READ( MACH64_BM_GUI_TABLE ) != ((table2_addr + 4*(j-1)*4) | MACH64_CIRCULAR_BUF_SIZE_16KB ) && i < 0x100000)
		++i;

	data2[64] = MACH64_READ( MACH64_BM_GUI_TABLE );
	data3[64] = MACH64_READ( MACH64_BM_COMMAND );
	
	table2[j*4+2] = cpu_to_le32(6 * sizeof( u32 ) | 0x40000000);

	data3[65] = MACH64_READ( MACH64_BM_COMMAND );
	data2[65] = MACH64_READ( MACH64_BM_GUI_TABLE );
	
#if 0
	table2[j*4+4] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);
	table2[j*4+5] = cpu_to_le32(data2_addr);
	table2[j*4+6] = cpu_to_le32(6 * sizeof( u32 ) | 0x80000000 | 0x40000000);
	table2[j*4+7] = 0;
	table2[0*4+1] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);
	table2[0*4+2] = cpu_to_le32(data3_addr);
	table2[0*4+3] = cpu_to_le32(6 * sizeof( u32 ) | 0x80000000 | 0x40000000);
	table2[0*4+4] = 0;
        /* Overwrite the last descriptor of table */
	/*table2[j*4+0] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET);*/
	/*table2[j*4+1] = cpu_to_le32(data_addr);*/
	table2[j*4+2] = cpu_to_le32(6 * sizeof( u32 ) | 0x40000000);
	/*table2[j*4+3] = 0;*/
#endif

	data[64] = i;
	for (i = 66; i < 1024; ++i) {
		data2[i] = MACH64_READ( MACH64_BM_GUI_TABLE );
		data3[i] = MACH64_READ( MACH64_BM_COMMAND );
	}
	
	DRM_INFO( "waiting for idle...\n" );
		/* engine locked up, dump register state and reset */
		DRM_INFO( "mach64_do_wait_for_idle failed (result=%d)\n", i);
		mach64_dump_engine_info( dev_priv );
	if ( ( i = mach64_do_wait_for_idle( dev_priv ) ) ) {
	DRM_INFO( " timeout = 0x%08x\n", data[64]);
	for (i = 64; i < 1024; ++i) {
		if( i <= 65 || data2[i] != data2[i-1])
			DRM_INFO( " EG1 = 0x%08x   REG2 = 0x%08x\n", data2[i], data3[i] );
	}
		DRM_INFO( "resetting engine ...\n");
		mach64_do_engine_reset( dev );
		DRM_INFO( "freeing data buffer memory.\n" );
		pci_pool_free( dev_priv->pool, cpu_addr_data, data_handle );
		pci_pool_free( dev_priv->pool, cpu_addr_data2, data2_handle );
		pci_pool_free( dev_priv->pool, cpu_addr_data3, data3_handle );
		pci_free_consistent( NULL, 32*1024, cpu_addr_table2, table2_handle );
		DRM_INFO( "returning ...\n" );
		return i;
	}
	DRM_INFO( "waitini for idle...done\n" );

	
		mach64_dump_engine_info( dev_priv );
	DRM_INFO( " timeout = 0x%08x\n", data[64]);
	for (i = 64; i < 1024; ++i) {
		if( i <= 65 || data2[i] != data2[i-1] || data3[i] != data3[i-1])
			DRM_INFO( " GUI_TABLE = 0x%08x   COMMAND = 0x%08x\n", data2[i], data3[i] );
	}
	/* Check register values to see if the GUI master operation succeeded */
	for ( i = 0; i < 3; i++ ) {
		regs[i] = MACH64_READ( (MACH64_VERTEX_1_S + i*4) );
		DRM_INFO( "(After DMA Transfer) reg %d = 0x%08x\n", i, regs[i] );
		DRM_DEBUG( "(After DMA Transfer) reg %d = 0x%08x\n", i, regs[i] );
		if (regs[i] != expected[i])
			return -1; /* GUI master operation failed */
	}

	DRM_DEBUG( "freeing data buffer memory.\n" );
	pci_pool_free( dev_priv->pool, cpu_addr_data, data_handle );
	pci_pool_free( dev_priv->pool, cpu_addr_data2, data2_handle );
	pci_pool_free( dev_priv->pool, cpu_addr_data3, data3_handle );
	pci_free_consistent( NULL, 32*1024, cpu_addr_table2, table2_handle );
	DRM_DEBUG( "returning ...\n" );
	
	return 0;
}

Reply via email to