On Thu, Feb 18, 2010 at 1:45 PM, Alex Deucher <[email protected]> wrote: > From 8f47a2a76ceb0638cfd3b053e7cb2bf3dc1f8b4a Mon Sep 17 00:00:00 2001 > From: Alex Deucher <[email protected]> > Date: Thu, 18 Feb 2010 13:39:36 -0500 > Subject: [PATCH] drm/radeon/r7xx: fixes to gfx init > > - RV740 requires a special backend map > - updated swizzle modes for backend map setup > - fix programming of a few gfx regs > > This fixes occulusion queries and rendering errors on RV740.
Hold on on this patch for now. I'm discussing a better fix internally. Alex > > Signed-off-by: Alex Deucher <[email protected]> > --- > drivers/gpu/drm/radeon/r600_cp.c | 185 +++++++++++++++++++++++++++---------- > drivers/gpu/drm/radeon/rv770.c | 189 > +++++++++++++++++++++++++++----------- > 2 files changed, 272 insertions(+), 102 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/r600_cp.c > b/drivers/gpu/drm/radeon/r600_cp.c > index d9712a1..b90d9e6 100644 > --- a/drivers/gpu/drm/radeon/r600_cp.c > +++ b/drivers/gpu/drm/radeon/r600_cp.c > @@ -1162,7 +1162,8 @@ static void r600_gfx_init(struct drm_device *dev, > > } > > -static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes, > +static u32 r700_get_tile_pipe_to_backend_map(drm_radeon_private_t *dev_priv, > + u32 num_tile_pipes, > u32 num_backends, > u32 backend_disable_mask) > { > @@ -1173,6 +1174,7 @@ static u32 r700_get_tile_pipe_to_backend_map(u32 > num_tile_pipes, > u32 swizzle_pipe[R7XX_MAX_PIPES]; > u32 cur_backend; > u32 i; > + bool force_no_swizzle; > > if (num_tile_pipes > R7XX_MAX_PIPES) > num_tile_pipes = R7XX_MAX_PIPES; > @@ -1202,6 +1204,18 @@ static u32 > r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes, > if (enabled_backends_count != num_backends) > num_backends = enabled_backends_count; > > + switch (dev_priv->flags & RADEON_FAMILY_MASK) { > + case CHIP_RV770: > + case CHIP_RV730: > + force_no_swizzle = false; > + break; > + case CHIP_RV710: > + case CHIP_RV740: > + default: > + force_no_swizzle = true; > + break; > + } > + > memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES); > switch (num_tile_pipes) { > case 1: > @@ -1212,49 +1226,100 @@ static u32 > r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes, > swizzle_pipe[1] = 1; > break; > case 3: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 1; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 1; > + } > break; > case 4: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 3; > - swizzle_pipe[3] = 1; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + swizzle_pipe[3] = 3; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 3; > + swizzle_pipe[3] = 1; > + } > break; > case 5: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 4; > - swizzle_pipe[3] = 1; > - swizzle_pipe[4] = 3; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + swizzle_pipe[3] = 3; > + swizzle_pipe[4] = 4; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 4; > + swizzle_pipe[3] = 1; > + swizzle_pipe[4] = 3; > + } > break; > case 6: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 4; > - swizzle_pipe[3] = 5; > - swizzle_pipe[4] = 3; > - swizzle_pipe[5] = 1; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + swizzle_pipe[3] = 3; > + swizzle_pipe[4] = 4; > + swizzle_pipe[5] = 5; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 4; > + swizzle_pipe[3] = 5; > + swizzle_pipe[4] = 3; > + swizzle_pipe[5] = 1; > + } > break; > case 7: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 4; > - swizzle_pipe[3] = 6; > - swizzle_pipe[4] = 3; > - swizzle_pipe[5] = 1; > - swizzle_pipe[6] = 5; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + swizzle_pipe[3] = 3; > + swizzle_pipe[4] = 4; > + swizzle_pipe[5] = 5; > + swizzle_pipe[6] = 6; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 4; > + swizzle_pipe[3] = 6; > + swizzle_pipe[4] = 3; > + swizzle_pipe[5] = 1; > + swizzle_pipe[6] = 5; > + } > break; > case 8: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 4; > - swizzle_pipe[3] = 6; > - swizzle_pipe[4] = 3; > - swizzle_pipe[5] = 1; > - swizzle_pipe[6] = 7; > - swizzle_pipe[7] = 5; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + swizzle_pipe[3] = 3; > + swizzle_pipe[4] = 4; > + swizzle_pipe[5] = 5; > + swizzle_pipe[6] = 6; > + swizzle_pipe[7] = 7; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 4; > + swizzle_pipe[3] = 6; > + swizzle_pipe[4] = 3; > + swizzle_pipe[5] = 1; > + swizzle_pipe[6] = 7; > + swizzle_pipe[7] = 5; > + } > break; > } > > @@ -1275,8 +1340,10 @@ static void r700_gfx_init(struct drm_device *dev, > drm_radeon_private_t *dev_priv) > { > int i, j, num_qd_pipes; > + u32 ta_aux_cntl; > u32 sx_debug_1; > u32 smx_dc_ctl0; > + u32 db_debug3; > u32 num_gs_verts_per_thread; > u32 vgt_gs_per_es; > u32 gs_prim_buffer_depth = 0; > @@ -1439,9 +1506,15 @@ static void r700_gfx_init(struct drm_device *dev, > > gb_tiling_config |= R600_BANK_SWAPS(1); > > - backend_map = > r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes, > - > dev_priv->r600_max_backends, > - (0xff << > dev_priv->r600_max_backends) & 0xff); > + /* RV740 has a special backend map */ > + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740) > + backend_map = 0x28; > + else > + backend_map = r700_get_tile_pipe_to_backend_map(dev_priv, > + > dev_priv->r600_max_tile_pipes, > + > dev_priv->r600_max_backends, > + > ((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) > + & > R7XX_MAX_BACKENDS_MASK)); > gb_tiling_config |= R600_BACKEND_MAP(backend_map); > > cc_gc_shader_pipe_config = > @@ -1488,10 +1561,8 @@ static void r700_gfx_init(struct drm_device *dev, > > RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30)); > > - RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO | > - R600_SYNC_GRADIENT | > - R600_SYNC_WALKER | > - R600_SYNC_ALIGNER)); > + ta_aux_cntl = RADEON_READ(R600_TA_CNTL_AUX); > + RADEON_WRITE(R600_TA_CNTL_AUX, ta_aux_cntl | R600_DISABLE_CUBE_ANISO); > > sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1); > sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS; > @@ -1502,14 +1573,28 @@ static void r700_gfx_init(struct drm_device *dev, > smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - > 1); > RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0); > > - RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) | > - R700_GS_FLUSH_CTL(4) | > - R700_ACK_FLUSH_CTL(3) | > - R700_SYNC_FLUSH_CTL)); > + if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV740) > + RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) | > + R700_GS_FLUSH_CTL(4) | > + R700_ACK_FLUSH_CTL(3) | > + R700_SYNC_FLUSH_CTL)); > > - if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770) > - RADEON_WRITE(R700_DB_DEBUG3, R700_DB_CLK_OFF_DELAY(0x1f)); > - else { > + db_debug3 = RADEON_READ(R700_DB_DEBUG3); > + db_debug3 &= ~R700_DB_CLK_OFF_DELAY(0x1f); > + switch (dev_priv->flags & RADEON_FAMILY_MASK) { > + case CHIP_RV770: > + case CHIP_RV740: > + db_debug3 |= R700_DB_CLK_OFF_DELAY(0x1f); > + break; > + case CHIP_RV710: > + case CHIP_RV730: > + default: > + db_debug3 |= R700_DB_CLK_OFF_DELAY(2); > + break; > + } > + RADEON_WRITE(R700_DB_DEBUG3, db_debug3); > + > + if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV770) { > db_debug4 = RADEON_READ(RV700_DB_DEBUG4); > db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER; > RADEON_WRITE(RV700_DB_DEBUG4, db_debug4); > @@ -1538,10 +1623,10 @@ static void r700_gfx_init(struct drm_device *dev, > R600_ALU_UPDATE_FIFO_HIWATER(0x8)); > switch (dev_priv->flags & RADEON_FAMILY_MASK) { > case CHIP_RV770: > - sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1); > - break; > case CHIP_RV730: > case CHIP_RV710: > + sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1); > + break; > case CHIP_RV740: > default: > sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4); > diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c > index fbec052..3fce1e7 100644 > --- a/drivers/gpu/drm/radeon/rv770.c > +++ b/drivers/gpu/drm/radeon/rv770.c > @@ -274,9 +274,10 @@ static int rv770_cp_load_microcode(struct > radeon_device *rdev) > /* > * Core functions > */ > -static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes, > - u32 num_backends, > - u32 backend_disable_mask) > +static u32 r700_get_tile_pipe_to_backend_map(struct radeon_device *rdev, > + u32 num_tile_pipes, > + u32 num_backends, > + u32 backend_disable_mask) > { > u32 backend_map = 0; > u32 enabled_backends_mask; > @@ -285,6 +286,7 @@ static u32 r700_get_tile_pipe_to_backend_map(u32 > num_tile_pipes, > u32 swizzle_pipe[R7XX_MAX_PIPES]; > u32 cur_backend; > u32 i; > + bool force_no_swizzle; > > if (num_tile_pipes > R7XX_MAX_PIPES) > num_tile_pipes = R7XX_MAX_PIPES; > @@ -314,6 +316,18 @@ static u32 r700_get_tile_pipe_to_backend_map(u32 > num_tile_pipes, > if (enabled_backends_count != num_backends) > num_backends = enabled_backends_count; > > + switch (rdev->family) { > + case CHIP_RV770: > + case CHIP_RV730: > + force_no_swizzle = false; > + break; > + case CHIP_RV710: > + case CHIP_RV740: > + default: > + force_no_swizzle = true; > + break; > + } > + > memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES); > switch (num_tile_pipes) { > case 1: > @@ -324,49 +338,100 @@ static u32 > r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes, > swizzle_pipe[1] = 1; > break; > case 3: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 1; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 1; > + } > break; > case 4: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 3; > - swizzle_pipe[3] = 1; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + swizzle_pipe[3] = 3; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 3; > + swizzle_pipe[3] = 1; > + } > break; > case 5: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 4; > - swizzle_pipe[3] = 1; > - swizzle_pipe[4] = 3; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + swizzle_pipe[3] = 3; > + swizzle_pipe[4] = 4; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 4; > + swizzle_pipe[3] = 1; > + swizzle_pipe[4] = 3; > + } > break; > case 6: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 4; > - swizzle_pipe[3] = 5; > - swizzle_pipe[4] = 3; > - swizzle_pipe[5] = 1; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + swizzle_pipe[3] = 3; > + swizzle_pipe[4] = 4; > + swizzle_pipe[5] = 5; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 4; > + swizzle_pipe[3] = 5; > + swizzle_pipe[4] = 3; > + swizzle_pipe[5] = 1; > + } > break; > case 7: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 4; > - swizzle_pipe[3] = 6; > - swizzle_pipe[4] = 3; > - swizzle_pipe[5] = 1; > - swizzle_pipe[6] = 5; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + swizzle_pipe[3] = 3; > + swizzle_pipe[4] = 4; > + swizzle_pipe[5] = 5; > + swizzle_pipe[6] = 6; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 4; > + swizzle_pipe[3] = 6; > + swizzle_pipe[4] = 3; > + swizzle_pipe[5] = 1; > + swizzle_pipe[6] = 5; > + } > break; > case 8: > - swizzle_pipe[0] = 0; > - swizzle_pipe[1] = 2; > - swizzle_pipe[2] = 4; > - swizzle_pipe[3] = 6; > - swizzle_pipe[4] = 3; > - swizzle_pipe[5] = 1; > - swizzle_pipe[6] = 7; > - swizzle_pipe[7] = 5; > + if (force_no_swizzle) { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 1; > + swizzle_pipe[2] = 2; > + swizzle_pipe[3] = 3; > + swizzle_pipe[4] = 4; > + swizzle_pipe[5] = 5; > + swizzle_pipe[6] = 6; > + swizzle_pipe[7] = 7; > + } else { > + swizzle_pipe[0] = 0; > + swizzle_pipe[1] = 2; > + swizzle_pipe[2] = 4; > + swizzle_pipe[3] = 6; > + swizzle_pipe[4] = 3; > + swizzle_pipe[5] = 1; > + swizzle_pipe[6] = 7; > + swizzle_pipe[7] = 5; > + } > break; > } > > @@ -386,8 +451,10 @@ static u32 r700_get_tile_pipe_to_backend_map(u32 > num_tile_pipes, > static void rv770_gpu_init(struct radeon_device *rdev) > { > int i, j, num_qd_pipes; > + u32 ta_aux_cntl; > u32 sx_debug_1; > u32 smx_dc_ctl0; > + u32 db_debug3; > u32 num_gs_verts_per_thread; > u32 vgt_gs_per_es; > u32 gs_prim_buffer_depth = 0; > @@ -556,9 +623,15 @@ static void rv770_gpu_init(struct radeon_device *rdev) > > gb_tiling_config |= BANK_SWAPS(1); > > - backend_map = > r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes, > - > rdev->config.rv770.max_backends, > - (0xff << > rdev->config.rv770.max_backends) & 0xff); > + /* RV740 has a special backend map */ > + if (rdev->family == CHIP_RV740) > + backend_map = 0x28; > + else > + backend_map = r700_get_tile_pipe_to_backend_map(rdev, > + > rdev->config.rv770.max_tile_pipes, > + > rdev->config.rv770.max_backends, > + > ((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) > + & > R7XX_MAX_BACKENDS_MASK)); > gb_tiling_config |= BACKEND_MAP(backend_map); > > cc_gc_shader_pipe_config = > @@ -594,10 +667,8 @@ static void rv770_gpu_init(struct radeon_device *rdev) > > WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30)); > > - WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | > - SYNC_GRADIENT | > - SYNC_WALKER | > - SYNC_ALIGNER)); > + ta_aux_cntl = RREG32(TA_CNTL_AUX); > + WREG32(TA_CNTL_AUX, ta_aux_cntl | DISABLE_CUBE_ANISO); > > sx_debug_1 = RREG32(SX_DEBUG_1); > sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS; > @@ -608,14 +679,28 @@ static void rv770_gpu_init(struct radeon_device *rdev) > smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - > 1); > WREG32(SMX_DC_CTL0, smx_dc_ctl0); > > - WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) | > - GS_FLUSH_CTL(4) | > - ACK_FLUSH_CTL(3) | > - SYNC_FLUSH_CTL)); > + if (rdev->family != CHIP_RV740) > + WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) | > + GS_FLUSH_CTL(4) | > + ACK_FLUSH_CTL(3) | > + SYNC_FLUSH_CTL)); > > - if (rdev->family == CHIP_RV770) > - WREG32(DB_DEBUG3, DB_CLK_OFF_DELAY(0x1f)); > - else { > + db_debug3 = RREG32(DB_DEBUG3); > + db_debug3 &= ~DB_CLK_OFF_DELAY(0x1f); > + switch (rdev->family) { > + case CHIP_RV770: > + case CHIP_RV740: > + db_debug3 |= DB_CLK_OFF_DELAY(0x1f); > + break; > + case CHIP_RV710: > + case CHIP_RV730: > + default: > + db_debug3 |= DB_CLK_OFF_DELAY(2); > + break; > + } > + WREG32(DB_DEBUG3, db_debug3); > + > + if (rdev->family != CHIP_RV770) { > db_debug4 = RREG32(DB_DEBUG4); > db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER; > WREG32(DB_DEBUG4, db_debug4); > @@ -644,10 +729,10 @@ static void rv770_gpu_init(struct radeon_device *rdev) > ALU_UPDATE_FIFO_HIWATER(0x8)); > switch (rdev->family) { > case CHIP_RV770: > - sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1); > - break; > case CHIP_RV730: > case CHIP_RV710: > + sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1); > + break; > case CHIP_RV740: > default: > sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4); > -- > 1.5.6.3 > ------------------------------------------------------------------------------ Download Intel® Parallel Studio Eval Try the new software tools for yourself. Speed compiling, find bugs proactively, and fine-tune applications for parallel performance. See why Intel Parallel Studio got high marks during beta. http://p.sf.net/sfu/intel-sw-dev -- _______________________________________________ Dri-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/dri-devel
