Hi Stan,

Please find my comments inline


On Mon, 2022-04-11 at 19:25 +0300, Stanislav Lisovskiy wrote:
> Whenever we are not able to get enough timeslots
> for required PBN, let's try to allocate those
> using DSC, just same way as we do for SST.
> 
> v2: Removed intel_dp_mst_dsc_compute_config and refactored
>     intel_dp_dsc_compute_config to support timeslots as a
>     parameter(Ville Syrjälä)
> 
> Signed-off-by: Stanislav Lisovskiy <[email protected]>
> ---
>  drivers/gpu/drm/i915/display/intel_dp.c     |  75 +++++-----
>  drivers/gpu/drm/i915/display/intel_dp.h     |  17 +++
>  drivers/gpu/drm/i915/display/intel_dp_mst.c | 143 ++++++++++++++++++++
>  3 files changed, 191 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c 
> b/drivers/gpu/drm/i915/display/intel_dp.c
> index 972c9ed46829..f5477f1bf622 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -114,7 +114,6 @@ bool intel_dp_is_edp(struct intel_dp *intel_dp)
>  }
>  
>  static void intel_dp_unset_edid(struct intel_dp *intel_dp);
> -static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 
> dsc_max_bpc);
>  
>  /* Is link rate UHBR and thus 128b/132b? */
>  bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state)
> @@ -640,11 +639,12 @@ small_joiner_ram_size_bits(struct drm_i915_private 
> *i915)
>               return 6144 * 8;
>  }
>  
> -static u16 intel_dp_dsc_get_output_bpp(struct drm_i915_private *i915,
> -                                    u32 link_clock, u32 lane_count,
> -                                    u32 mode_clock, u32 mode_hdisplay,
> -                                    bool bigjoiner,
> -                                    u32 pipe_bpp)
> +u16 intel_dp_dsc_get_output_bpp(struct drm_i915_private *i915,
> +                             u32 link_clock, u32 lane_count,
> +                             u32 mode_clock, u32 mode_hdisplay,
> +                             bool bigjoiner,
> +                             u32 pipe_bpp,
> +                             u32 timeslots)
>  {
>       u32 bits_per_pixel, max_bpp_small_joiner_ram;
>       int i;
> @@ -656,7 +656,7 @@ static u16 intel_dp_dsc_get_output_bpp(struct 
> drm_i915_private *i915,
>        * for MST -> TimeSlotsPerMTP has to be calculated
>        */
>       bits_per_pixel = (link_clock * lane_count * 8) /
> -                      intel_dp_mode_to_fec_clock(mode_clock);
> +                      (intel_dp_mode_to_fec_clock(mode_clock) * timeslots);
>       drm_dbg_kms(&i915->drm, "Max link bpp: %u\n", bits_per_pixel);
>  
>       /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */
> @@ -710,9 +710,9 @@ static u16 intel_dp_dsc_get_output_bpp(struct 
> drm_i915_private *i915,
>       return bits_per_pixel << 4;
>  }
>  
> -static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
> -                                    int mode_clock, int mode_hdisplay,
> -                                    bool bigjoiner)
> +u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
> +                             int mode_clock, int mode_hdisplay,
> +                             bool bigjoiner)
>  {
>       struct drm_i915_private *i915 = dp_to_i915(intel_dp);
>       u8 min_slice_count, i;
> @@ -919,8 +919,8 @@ intel_dp_mode_valid_downstream(struct intel_connector 
> *connector,
>       return MODE_OK;
>  }
>  
> -static bool intel_dp_need_bigjoiner(struct intel_dp *intel_dp,
> -                                 int hdisplay, int clock)
> +bool intel_dp_need_bigjoiner(struct intel_dp *intel_dp,
> +                          int hdisplay, int clock)
>  {
>       struct drm_i915_private *i915 = dp_to_i915(intel_dp);
>  
> @@ -1007,7 +1007,7 @@ intel_dp_mode_valid(struct drm_connector *_connector,
>                                                           target_clock,
>                                                           mode->hdisplay,
>                                                           bigjoiner,
> -                                                         pipe_bpp) >> 4;
> +                                                         pipe_bpp, 1) >> 4;
>                       dsc_slice_count =
>                               intel_dp_dsc_get_slice_count(intel_dp,
>                                                            target_clock,
> @@ -1311,7 +1311,7 @@ intel_dp_compute_link_config_wide(struct intel_dp 
> *intel_dp,
>       return -EINVAL;
>  }
>  
> -static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 
> max_req_bpc)
> +int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 max_req_bpc)
>  {
>       struct drm_i915_private *i915 = dp_to_i915(intel_dp);
>       int i, num_bpc;
> @@ -1401,10 +1401,11 @@ static int intel_dp_dsc_compute_params(struct 
> intel_encoder *encoder,
>       return drm_dsc_compute_rc_parameters(vdsc_cfg);
>  }
>  
> -static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
> -                                    struct intel_crtc_state *pipe_config,
> -                                    struct drm_connector_state *conn_state,
> -                                    struct link_config_limits *limits)
> +int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
> +                             struct intel_crtc_state *pipe_config,
> +                             struct drm_connector_state *conn_state,
> +                             struct link_config_limits *limits,
> +                             int timeslots)
>  {
>       struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
>       struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
> @@ -1455,7 +1456,8 @@ static int intel_dp_dsc_compute_config(struct intel_dp 
> *intel_dp,
>                                                   adjusted_mode->crtc_clock,
>                                                   
> adjusted_mode->crtc_hdisplay,
>                                                   
> pipe_config->bigjoiner_pipes,
> -                                                 pipe_bpp);
> +                                                 pipe_bpp,
> +                                                 timeslots);
>               dsc_dp_slice_count =
>                       intel_dp_dsc_get_slice_count(intel_dp,
>                                                    adjusted_mode->crtc_clock,
> @@ -1467,41 +1469,26 @@ static int intel_dp_dsc_compute_config(struct 
> intel_dp *intel_dp,
>                       return -EINVAL;
>               }
>               pipe_config->dsc.compressed_bpp = min_t(u16,
> -                                                            
> dsc_max_output_bpp >> 4,
> -                                                            
> pipe_config->pipe_bpp);
> +                                                     dsc_max_output_bpp >> 4,
> +                                                     pipe_config->pipe_bpp);
>               pipe_config->dsc.slice_count = dsc_dp_slice_count;
> +             drm_dbg_kms(&dev_priv->drm, "DSC: compressed bpp %d slice count 
> %d\n",
> +                         pipe_config->dsc.compressed_bpp,
> +                         pipe_config->dsc.slice_count);
>       }
> -
> -     /* As of today we support DSC for only RGB */
> -     if (intel_dp->force_dsc_bpp) {
> -             if (intel_dp->force_dsc_bpp >= 8 &&
> -                 intel_dp->force_dsc_bpp < pipe_bpp) {
> -                     drm_dbg_kms(&dev_priv->drm,
> -                                 "DSC BPP forced to %d",
> -                                 intel_dp->force_dsc_bpp);
> -                     pipe_config->dsc.compressed_bpp =
> -                                             intel_dp->force_dsc_bpp;
> -             } else {
> -                     drm_dbg_kms(&dev_priv->drm,
> -                                 "Invalid DSC BPP %d",
> -                                 intel_dp->force_dsc_bpp);
> -             }
> -     }
> -
>       /*
>        * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate
>        * is greater than the maximum Cdclock and if slice count is even
>        * then we need to use 2 VDSC instances.
>        */
> -     if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
> -         pipe_config->bigjoiner_pipes) {
> -             if (pipe_config->dsc.slice_count < 2) {
> +     if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq) {
> +             if (pipe_config->dsc.slice_count > 1) {
> +                     pipe_config->dsc.dsc_split = true;
> +             } else {
>                       drm_dbg_kms(&dev_priv->drm,
>                                   "Cannot split stream to use 2 VDSC 
> instances\n");
>                       return -EINVAL;
>               }
> -
> -             pipe_config->dsc.dsc_split = true;
>       }
>  
>       ret = intel_dp_dsc_compute_params(&dig_port->base, pipe_config);
> @@ -1590,7 +1577,7 @@ intel_dp_compute_link_config(struct intel_encoder 
> *encoder,
>                           str_yes_no(ret), str_yes_no(joiner_needs_dsc),
>                           str_yes_no(intel_dp->force_dsc_en));
>               ret = intel_dp_dsc_compute_config(intel_dp, pipe_config,
> -                                               conn_state, &limits);
> +                                               conn_state, &limits, 1);
>               if (ret < 0)
>                       return ret;
>       }
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.h 
> b/drivers/gpu/drm/i915/display/intel_dp.h
> index d457e17bdc57..4c0ad3158ee7 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.h
> +++ b/drivers/gpu/drm/i915/display/intel_dp.h
> @@ -55,6 +55,11 @@ void intel_dp_encoder_flush_work(struct drm_encoder 
> *encoder);
>  int intel_dp_compute_config(struct intel_encoder *encoder,
>                           struct intel_crtc_state *pipe_config,
>                           struct drm_connector_state *conn_state);
> +int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
> +                             struct intel_crtc_state *pipe_config,
> +                             struct drm_connector_state *conn_state,
> +                             struct link_config_limits *limits,
> +                             int timeslots);
>  bool intel_dp_is_edp(struct intel_dp *intel_dp);
>  bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state);
>  bool intel_dp_is_port_edp(struct drm_i915_private *dev_priv, enum port port);
> @@ -94,6 +99,18 @@ void intel_read_dp_sdp(struct intel_encoder *encoder,
>                      struct intel_crtc_state *crtc_state,
>                      unsigned int type);
>  bool intel_digital_port_connected(struct intel_encoder *encoder);
> +int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc);
> +u16 intel_dp_dsc_get_output_bpp(struct drm_i915_private *i915,
> +                             u32 link_clock, u32 lane_count,
> +                             u32 mode_clock, u32 mode_hdisplay,
> +                             bool bigjoiner,
> +                             u32 pipe_bpp,
> +                             u32 timeslots);
> +u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
> +                             int mode_clock, int mode_hdisplay,
> +                             bool bigjoiner);
> +bool intel_dp_need_bigjoiner(struct intel_dp *intel_dp,
> +                          int hdisplay, int clock);
>  
>  static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
>  {
> diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c
> b/drivers/gpu/drm/i915/display/intel_dp_mst.c
> index 061b277e5ce7..550fcd380487 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
> @@ -99,6 +99,82 @@ static int intel_dp_mst_compute_link_config(struct 
> intel_encoder *encoder,
>       return 0;
>  }
>  
> +static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder 
> *encoder,
> +                                             struct intel_crtc_state 
> *crtc_state,
> +                                             struct drm_connector_state 
> *conn_state,
> +                                             struct link_config_limits 
> *limits)
> +{
> +     struct drm_atomic_state *state = crtc_state->uapi.state;
> +     struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
> +     struct intel_dp *intel_dp = &intel_mst->primary->dp;
> +     struct intel_connector *connector =
> +             to_intel_connector(conn_state->connector);
> +     struct drm_i915_private *i915 = to_i915(connector->base.dev);
> +     const struct drm_display_mode *adjusted_mode =
> +             &crtc_state->hw.adjusted_mode;
> +     bool constant_n = drm_dp_has_quirk(&intel_dp->desc,
> +                                        DP_DPCD_QUIRK_CONSTANT_N);
> +     int bpp, slots = -EINVAL;
> +     int i, num_bpc;
> +     u8 dsc_bpc[3] = {0};
> +     int min_bpp, max_bpp;
> +     u8 dsc_max_bpc;
> +
> +     crtc_state->lane_count = limits->max_lane_count;
> +     crtc_state->port_clock = limits->max_rate;
> +
> +     /* Max DSC Input BPC for ICL is 10 and for TGL+ is 12 */
> +     if (DISPLAY_VER(i915) >= 12)
> +             dsc_max_bpc = min_t(u8, 12, conn_state->max_requested_bpc);
> +     else
> +             dsc_max_bpc = min_t(u8, 10, conn_state->max_requested_bpc);
> +
> +     max_bpp = min_t(u8, dsc_max_bpc * 3, limits->max_bpp);
> +     min_bpp = limits->min_bpp;
> +
> +     num_bpc = drm_dp_dsc_sink_supported_input_bpcs(intel_dp->dsc_dpcd,
> +                                                    dsc_bpc);
> +     for (i = 0; i < num_bpc; i++) {
> +             if (max_bpp >= dsc_bpc[i] * 3)
> +                     if (min_bpp > dsc_bpc[i] * 3)
> +                             min_bpp = dsc_bpc[i] * 3;
> +     }
> +     drm_dbg_kms(&i915->drm, "DSC Sink supported min bpp %d max bpp %d\n",
> +                 min_bpp, max_bpp);
> +     for (bpp = max_bpp; bpp >= min_bpp; bpp -= 2 * 3) {
> +             crtc_state->pbn = 
> drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock,
> +                                                    bpp << 4,
> +                                                    true);
> +
> +             slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr,
> +                                                   connector->port,
> +                                                   crtc_state->pbn, 0);
> +
> +             drm_dbg_kms(&i915->drm, "Trying bpp %d got %d pbn %d slots\n",
> +                         bpp, crtc_state->pbn, slots);
> +
> +             if (slots == -EDEADLK)
> +                     return slots;
> +             if (slots >= 0)
> +                     break;
> +     }
> +
> +     if (slots < 0) {
> +             drm_dbg_kms(&i915->drm, "failed finding vcpi slots:%d\n",
> +                         slots);
> +             return slots;
> +     }
> +
> +     intel_link_compute_m_n(crtc_state->pipe_bpp,
> +                            crtc_state->lane_count,
> +                            adjusted_mode->crtc_clock,
> +                            crtc_state->port_clock,
> +                            &crtc_state->dp_m_n,
> +                            constant_n, crtc_state->fec_enable);
> +     crtc_state->dp_m_n.tu = slots;
> +
> +     return 0;
> +}
>  static int intel_dp_mst_update_slots(struct intel_encoder *encoder,
>                                    struct intel_crtc_state *crtc_state,
>                                    struct drm_connector_state *conn_state)
> @@ -175,6 +251,27 @@ static int intel_dp_mst_compute_config(struct 
> intel_encoder *encoder,
>  
>       ret = intel_dp_mst_compute_link_config(encoder, pipe_config,
>                                              conn_state, &limits);

At this point it looks like it doesnt check any available bandwidth limitation!
intel_dp_mst_compute_link_config() calls drm_dp_atomic_find_vcpi_slots() which 
is the basis of the
below logic to decide whether to use DSC. But drm_dp_atomic_find_vcpi_slots() 
doesn't check any bw
limits. The description of this function says "Allocations set by this function 
are not checked
against the bandwidth restraints of @mgr until the driver calls 
drm_dp_mst_atomic_check()."

The VCPI slot check only happens in drm_dp_mst_atomic_check() which is called
from intel_atomic_check() 

So looks like we might need to update this logic to find a proper place for 
this DSC enable/disable
decision!

> +
> +     if (ret == -EDEADLK)
> +             return ret;
> +
> +     /* enable compression if the mode doesn't fit available BW */
> +     drm_dbg_kms(&dev_priv->drm, "Force DSC en = %d\n", 
> intel_dp->force_dsc_en);
> +     if (ret || intel_dp->force_dsc_en) {
> +             /*
> +              * Try to get at least some timeslots and then see, if
> +              * we can fit there with DSC.
> +              */
> +             ret = intel_dp_dsc_mst_compute_link_config(encoder, pipe_config,
> +                                                        conn_state, &limits);
> +             if (ret < 0)
> +                     return ret;
> +
> +             ret = intel_dp_dsc_compute_config(intel_dp, pipe_config,
> +                                               conn_state, &limits,
> +                                               pipe_config->dp_m_n.tu);
> +     }
> +
>       if (ret)
>               return ret;
>  
> @@ -713,6 +810,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector 
> *connector,
>       int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
>       int max_rate, mode_rate, max_lanes, max_link_clock;
>       int ret;
> +     bool dsc = false, bigjoiner = false;
> +     u16 dsc_max_output_bpp = 0;
> +     u8 dsc_slice_count = 0;
> +     int target_clock = mode->clock;
>  
>       if (drm_connector_is_unregistered(connector)) {
>               *status = MODE_ERROR;
> @@ -750,6 +851,48 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector 
> *connector,
>               return 0;
>       }
>  
> +     if (intel_dp_need_bigjoiner(intel_dp, mode->hdisplay, target_clock)) {
> +             bigjoiner = true;
> +             max_dotclk *= 2;
> +     }
> +
> +     if (DISPLAY_VER(dev_priv) >= 10 &&
> +         drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd)) {
> +             /*
> +              * TBD pass the connector BPC,
> +              * for now U8_MAX so that max BPC on that platform would be 
> picked
> +              */
> +             int pipe_bpp = intel_dp_dsc_compute_bpp(intel_dp, U8_MAX);
> +
> +             if (drm_dp_sink_supports_fec(intel_dp->fec_capable)) {
> +                     dsc_max_output_bpp =
> +                             intel_dp_dsc_get_output_bpp(dev_priv,
> +                                                         max_link_clock,
> +                                                         max_lanes,
> +                                                         target_clock,
> +                                                         mode->hdisplay,
> +                                                         bigjoiner,
> +                                                         pipe_bpp, 1) >> 4;
> +                     dsc_slice_count =
> +                             intel_dp_dsc_get_slice_count(intel_dp,
> +                                                          target_clock,
> +                                                          mode->hdisplay,
> +                                                          bigjoiner);
> +             }
> +
> +             dsc = dsc_max_output_bpp && dsc_slice_count;
> +     }
> +
> +     /*
> +      * Big joiner configuration needs DSC for TGL which is not true for
> +      * XE_LPD where uncompressed joiner is supported.
> +      */
> +     if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc)
> +             return MODE_CLOCK_HIGH;
> +
> +     if (mode_rate > max_rate && !dsc)
> +             return MODE_CLOCK_HIGH;
> +
>       *status = intel_mode_valid_max_plane_size(dev_priv, mode, false);
>       return 0;
>  }

Reply via email to