Reviewed-by: Samuel Pitoiset <[email protected]>

On 10/29/18 10:39 PM, Marek Olšák wrote:
From: Marek Olšák <[email protected]>

v2: fix enabling primitive binning
---
  src/amd/addrlib/amdgpu_asic_addr.h              |  2 ++
  src/amd/addrlib/gfx9/gfx9addrlib.cpp            |  2 +-
  src/amd/common/ac_gpu_info.c                    |  6 ++++++
  src/amd/common/ac_llvm_util.c                   |  2 ++
  src/amd/common/ac_surface.c                     |  4 ++++
  src/amd/common/amd_family.h                     |  1 +
  src/amd/common/gfx9d.h                          |  3 +++
  src/gallium/drivers/radeonsi/si_pipe.c          | 10 ++++++----
  src/gallium/drivers/radeonsi/si_state.c         |  4 +++-
  src/gallium/drivers/radeonsi/si_state_binning.c |  1 +
  10 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/amd/addrlib/amdgpu_asic_addr.h 
b/src/amd/addrlib/amdgpu_asic_addr.h
index e5838d42a3c..7436c5493e1 100644
--- a/src/amd/addrlib/amdgpu_asic_addr.h
+++ b/src/amd/addrlib/amdgpu_asic_addr.h
@@ -83,20 +83,21 @@
#define AMDGPU_CARRIZO_RANGE 0x01, 0x21
  #define AMDGPU_BRISTOL_RANGE    0x10, 0x21
  #define AMDGPU_STONEY_RANGE     0x61, 0xFF
#define AMDGPU_VEGA10_RANGE 0x01, 0x14
  #define AMDGPU_VEGA12_RANGE     0x14, 0x28
  #define AMDGPU_VEGA20_RANGE     0x28, 0xFF
#define AMDGPU_RAVEN_RANGE 0x01, 0x81
+#define AMDGPU_RAVEN2_RANGE     0x81, 0xFF
#define AMDGPU_EXPAND_FIX(x) x
  #define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
  #define AMDGPU_IN_RANGE(val, ...)   
AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__))
// ASICREV_IS(eRevisionId, revisionName)
  #define ASICREV_IS(r, rn)              AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE)
  #define ASICREV_IS_TAHITI_P(r)         ASICREV_IS(r, TAHITI)
  #define ASICREV_IS_PITCAIRN_PM(r)      ASICREV_IS(r, PITCAIRN)
@@ -125,12 +126,13 @@
  #define ASICREV_IS_CARRIZO_BRISTOL(r)  ASICREV_IS(r, BRISTOL)
  #define ASICREV_IS_STONEY(r)           ASICREV_IS(r, STONEY)
#define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10)
  #define ASICREV_IS_VEGA10_P(r)         ASICREV_IS(r, VEGA10)
  #define ASICREV_IS_VEGA12_P(r)         ASICREV_IS(r, VEGA12)
  #define ASICREV_IS_VEGA12_p(r)         ASICREV_IS(r, VEGA12)
  #define ASICREV_IS_VEGA20_P(r)         ASICREV_IS(r, VEGA20)
#define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN)
+#define ASICREV_IS_RAVEN2(r)           ASICREV_IS(r, RAVEN2)
#endif // _AMDGPU_ASIC_ADDR_H
diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp 
b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
index d27aabbb60c..f115242c89c 100644
--- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
@@ -1284,21 +1284,21 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
                  m_settings.htileAlignFix = 1;
                  m_settings.applyAliasFix = 1;
              }
m_settings.metaBaseAlignFix = 1; m_settings.depthPipeXorDisable = 1;
              break;
          case FAMILY_RV:
              m_settings.isArcticIsland = 1;
-            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
+            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision) || 
ASICREV_IS_RAVEN2(uChipRevision);
if (m_settings.isRaven)
              {
                  m_settings.isDcn1   = 1;
              }
m_settings.metaBaseAlignFix = 1; if (ASICREV_IS_RAVEN(uChipRevision))
              {
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 2c70fb2c721..689f544c18b 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -307,20 +307,26 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
                info->name = #cfamily; \
                break;
  #include "pci_ids/radeonsi_pci_ids.h"
  #undef CHIPSET
default:
                fprintf(stderr, "amdgpu: Invalid PCI ID.\n");
                return false;
        }
+ /* Raven2 uses the same PCI IDs as Raven1, but different revision IDs. */
+       if (info->family == CHIP_RAVEN && amdinfo->chip_rev >= 0x8) {
+               info->family = CHIP_RAVEN2;
+               info->name = "RAVEN2";
+       }
+
        if (info->family >= CHIP_VEGA10)
                info->chip_class = GFX9;
        else if (info->family >= CHIP_TONGA)
                info->chip_class = VI;
        else if (info->family >= CHIP_BONAIRE)
                info->chip_class = CIK;
        else if (info->family >= CHIP_TAHITI)
                info->chip_class = SI;
        else {
                fprintf(stderr, "amdgpu: Unknown family.\n");
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index cd3525187a0..69d9f7b9f3f 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -128,20 +128,22 @@ const char *ac_get_llvm_processor_name(enum radeon_family 
family)
        case CHIP_VEGAM:
                return "polaris11";
        case CHIP_VEGA10:
                return "gfx900";
        case CHIP_RAVEN:
                return "gfx902";
        case CHIP_VEGA12:
                return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902";
        case CHIP_VEGA20:
                return HAVE_LLVM >= 0x0700 ? "gfx906" : "gfx902";
+       case CHIP_RAVEN2:
+               return "gfx902"; /* TODO: use gfx909 when it's available */
        default:
                return "";
        }
  }
static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
                                                     enum 
ac_target_machine_options tm_options,
                                                     LLVMCodeGenOptLevel level,
                                                     const char **out_triple)
  {
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 94723dc9c09..1f7e2344625 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -144,20 +144,24 @@ static void addrlib_family_rev_id(enum radeon_family 
family,
                *addrlib_revid = get_first(AMDGPU_VEGA12_RANGE);
                break;
        case CHIP_VEGA20:
                *addrlib_family = FAMILY_AI;
                *addrlib_revid = get_first(AMDGPU_VEGA20_RANGE);
                break;
        case CHIP_RAVEN:
                *addrlib_family = FAMILY_RV;
                *addrlib_revid = get_first(AMDGPU_RAVEN_RANGE);
                break;
+       case CHIP_RAVEN2:
+               *addrlib_family = FAMILY_RV;
+               *addrlib_revid = get_first(AMDGPU_RAVEN2_RANGE);
+               break;
        default:
                fprintf(stderr, "amdgpu: Unknown family.\n");
        }
  }
static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
  {
        return malloc(pInput->sizeInBytes);
  }
diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h
index a282898be06..185ba029763 100644
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -90,20 +90,21 @@ enum radeon_family {
      CHIP_FIJI,
      CHIP_STONEY,
      CHIP_POLARIS10,
      CHIP_POLARIS11,
      CHIP_POLARIS12,
      CHIP_VEGAM,
      CHIP_VEGA10,
      CHIP_VEGA12,
      CHIP_VEGA20,
      CHIP_RAVEN,
+    CHIP_RAVEN2,
      CHIP_LAST,
  };
enum chip_class {
      CLASS_UNKNOWN = 0,
      R300,
      R400,
      R500,
      R600,
      R700,
diff --git a/src/amd/common/gfx9d.h b/src/amd/common/gfx9d.h
index d18e6655d33..2e790c54699 100644
--- a/src/amd/common/gfx9d.h
+++ b/src/amd/common/gfx9d.h
@@ -4450,20 +4450,23 @@
  #define R_028424_CB_DCC_CONTROL                                         
0x028424
  #define   S_028424_OVERWRITE_COMBINER_DISABLE(x)                      (((unsigned)(x) 
& 0x1) << 0)
  #define   G_028424_OVERWRITE_COMBINER_DISABLE(x)                      (((x) >> 0) 
& 0x1)
  #define   C_028424_OVERWRITE_COMBINER_DISABLE                         
0xFFFFFFFE
  #define   S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x)          (((unsigned)(x) 
& 0x1) << 1)
  #define   G_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x)          (((x) >> 1) 
& 0x1)
  #define   C_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE             
0xFFFFFFFD
  #define   S_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((unsigned)(x) 
& 0x1F) << 2)
  #define   G_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((x) >> 2) 
& 0x1F)
  #define   C_028424_OVERWRITE_COMBINER_WATERMARK                       
0xFFFFFF83
+#define   S_028424_DISABLE_CONSTANT_ENCODE_REG(x)                     (((unsigned)(x) 
& 0x1) << 10) /* Raven2+ */
+#define   G_028424_DISABLE_CONSTANT_ENCODE_REG(x)                     (((x) >> 10) 
& 0x1)
+#define   C_028424_DISABLE_CONSTANT_ENCODE_REG                        
0xFFFFFBFF
  #define R_02842C_DB_STENCIL_CONTROL                                     
0x02842C
  #define   S_02842C_STENCILFAIL(x)                                     (((unsigned)(x) 
& 0x0F) << 0)
  #define   G_02842C_STENCILFAIL(x)                                     (((x) >> 0) 
& 0x0F)
  #define   C_02842C_STENCILFAIL                                        
0xFFFFFFF0
  #define   S_02842C_STENCILZPASS(x)                                    (((unsigned)(x) 
& 0x0F) << 4)
  #define   G_02842C_STENCILZPASS(x)                                    (((x) >> 4) 
& 0x0F)
  #define   C_02842C_STENCILZPASS                                       
0xFFFFFF0F
  #define   S_02842C_STENCILZFAIL(x)                                    (((unsigned)(x) 
& 0x0F) << 8)
  #define   G_02842C_STENCILZFAIL(x)                                    (((x) >> 8) 
& 0x0F)
  #define   C_02842C_STENCILZFAIL                                       
0xFFFFF0FF
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 6118b8076f1..490a3714836 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1026,24 +1026,25 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
        sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 
&&
                                            sscreen->info.family <= 
CHIP_POLARIS12) ||
                                           sscreen->info.family == CHIP_VEGA10 
||
                                           sscreen->info.family == CHIP_RAVEN;
        sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
                                        sscreen->info.family == CHIP_RAVEN;
if (sscreen->debug_flags & DBG(DPBB)) {
                sscreen->dpbb_allowed = true;
        } else {
-               /* Only enable primitive binning on Raven by default. */
+               /* Only enable primitive binning on APUs by default. */
                /* TODO: Investigate if binning is profitable on Vega12. */
-               sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN &&
-                                       !(sscreen->debug_flags & DBG(NO_DPBB));
+               sscreen->dpbb_allowed = !(sscreen->debug_flags & DBG(NO_DPBB)) 
&&
+                                       (sscreen->info.family == CHIP_RAVEN ||
+                                        sscreen->info.family == CHIP_RAVEN2);
        }
if (sscreen->debug_flags & DBG(DFSM)) {
                sscreen->dfsm_allowed = sscreen->dpbb_allowed;
        } else {
                sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
                                        !(sscreen->debug_flags & DBG(NO_DFSM));
        }
/* While it would be nice not to have this flag, we are constrained
@@ -1056,21 +1057,22 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
         * always disable it.
         */
        if (sscreen->info.family == CHIP_STONEY ||
            sscreen->info.chip_class >= GFX9) {
                sscreen->has_rbplus = true;
sscreen->rbplus_allowed =
                        !(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
                        (sscreen->info.family == CHIP_STONEY ||
                         sscreen->info.family == CHIP_VEGA12 ||
-                        sscreen->info.family == CHIP_RAVEN);
+                        sscreen->info.family == CHIP_RAVEN ||
+                        sscreen->info.family == CHIP_RAVEN2);
        }
sscreen->dcc_msaa_allowed =
                !(sscreen->debug_flags & DBG(NO_DCC_MSAA));
sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI; (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
        sscreen->use_monolithic_shaders =
                (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 43d76d19916..0293bdfa791 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -113,21 +113,22 @@ static void si_emit_cb_render_state(struct si_context 
*sctx)
                                  blend &&
                                  blend->blend_enable_4bit & cb_target_mask &&
                                  sctx->framebuffer.nr_samples >= 2;
                unsigned watermark = 
sctx->framebuffer.dcc_overwrite_combiner_watermark;
radeon_opt_set_context_reg(
                                sctx, R_028424_CB_DCC_CONTROL,
                                SI_TRACKED_CB_DCC_CONTROL,
                                
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
                                
S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
-                               
S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable));
+                               S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) 
|
+                               
S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->family == CHIP_RAVEN2));
        }
/* RB+ register settings. */
        if (sctx->screen->rbplus_allowed) {
                unsigned spi_shader_col_format =
                        sctx->ps_shader.cso ?
                        
sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
                unsigned sx_ps_downconvert = 0;
                unsigned sx_blend_opt_epsilon = 0;
                unsigned sx_blend_opt_control = 0;
@@ -5093,20 +5094,21 @@ static void si_init_config(struct si_context *sctx)
                unsigned num_se = sscreen->info.max_se;
                unsigned pc_lines = 0;
switch (sctx->family) {
                case CHIP_VEGA10:
                case CHIP_VEGA12:
                case CHIP_VEGA20:
                        pc_lines = 4096;
                        break;
                case CHIP_RAVEN:
+               case CHIP_RAVEN2:
                        pc_lines = 1024;
                        break;
                default:
                        assert(0);
                }
si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
                               S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 
* num_se))) |
                               S_028C48_MAX_PRIM_PER_BATCH(1023));
                si_pm4_set_reg(pm4, 
R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c 
b/src/gallium/drivers/radeonsi/si_state_binning.c
index 70c129242d1..3516e561282 100644
--- a/src/gallium/drivers/radeonsi/si_state_binning.c
+++ b/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -400,20 +400,21 @@ void si_emit_dpbb_state(struct si_context *sctx)
        /* Tunable parameters. Also test with DFSM enabled/disabled. */
        unsigned context_states_per_bin; /* allowed range: [0, 5] */
        unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
        unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
switch (sctx->family) {
        case CHIP_VEGA10:
        case CHIP_VEGA12:
        case CHIP_VEGA20:
        case CHIP_RAVEN:
+       case CHIP_RAVEN2:
                /* Tuned for Raven. Vega might need different values. */
                context_states_per_bin = 5;
                persistent_states_per_bin = 31;
                fpovs_per_batch = 63;
                break;
        default:
                assert(0);
        }
/* Emit registers. */

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to