[ANNOUNCE] weston 13.0.94

2024-08-30 Thread Marius Vlad
Hi all,

This is the RC2 release for Weston 14.0.0.

Changelog since RC1:

Marius Vlad (2):
  libweston/color-management: Add fallback for static_assert
  build: bump to version 13.0.94 for the RC2 release

git tag: 13.0.94

https://gitlab.freedesktop.org/wayland/weston/-/releases/13.0.94/downloads/weston-13.0.94.tar.xz
SHA256: 77d6b9dff7657362ba58f9af521d214f135024ff389546d718a2a34d40236f18  
weston-13.0.94.tar.xz
SHA512: 
88d58fe4a38eb1032d516a1e9a5cff8cd72ee89b867129c33d3384a9261ed316f93e98672df71930b8767257689ea5d289bb7e4fd272738199fa0684b5112f23
  weston-13.0.94.tar.xz
PGP:
https://gitlab.freedesktop.org/wayland/weston/-/releases/13.0.94/downloads/weston-13.0.94.tar.xz.sig


signature.asc
Description: PGP signature


[ANNOUNCE] weston 13.0.94

2024-08-30 Thread Marius Vlad
Hi all,

This is the RC2 release for Weston 14.0.0.

Changelog since RC1:

Marius Vlad (2):
  libweston/color-management: Add fallback for static_assert
  build: bump to version 13.0.94 for the RC2 release

git tag: 13.0.94

https://gitlab.freedesktop.org/wayland/weston/-/releases/13.0.94/downloads/weston-13.0.94.tar.xz
SHA256: 77d6b9dff7657362ba58f9af521d214f135024ff389546d718a2a34d40236f18  
weston-13.0.94.tar.xz
SHA512: 
88d58fe4a38eb1032d516a1e9a5cff8cd72ee89b867129c33d3384a9261ed316f93e98672df71930b8767257689ea5d289bb7e4fd272738199fa0684b5112f23
  weston-13.0.94.tar.xz
PGP:
https://gitlab.freedesktop.org/wayland/weston/-/releases/13.0.94/downloads/weston-13.0.94.tar.xz.sig


signature.asc
Description: PGP signature


Re: [PATCH v5 02/44] drm/vkms: Round fixp2int conversion in lerp_u16

2024-08-30 Thread Louis Chauvet
Le 19/08/24 - 16:56, Harry Wentland a écrit :
> fixp2int always rounds down, fixp2int_ceil rounds up. We need
> the new fixp2int_round.
> 
> Signed-off-by: Harry Wentland 
> ---
>  drivers/gpu/drm/vkms/vkms_composer.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
> b/drivers/gpu/drm/vkms/vkms_composer.c
> index e7441b227b3c..3d6785d081f2 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -98,7 +98,7 @@ static u16 lerp_u16(u16 a, u16 b, s64 t)
>  
>   s64 delta = drm_fixp_mul(b_fp - a_fp,  t);
>  
> - return drm_fixp2int(a_fp + delta);
> + return drm_fixp2int_round(a_fp + delta);
>  }
>  
>  static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value)
> -- 
> 2.46.0
> 

Reviewed-by: Louis Chauvet 


Re: [PATCH v5 14/44] drm/vkms: Add enumerated 1D curve colorop

2024-08-30 Thread Louis Chauvet
Le 19/08/24 - 16:56, Harry Wentland a écrit :

> +static int vkms_initialize_color_pipeline(struct drm_plane *plane, struct 
> drm_prop_enum_list *list)
> +{
> +
> + struct drm_colorop *op, *prev_op;
> + struct drm_device *dev = plane->dev;
> + int ret;
> +
> + /* 1st op: 1d curve */
> + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
> + if (!op) {
> + DRM_ERROR("KMS: Failed to allocate colorop\n");
> + return -ENOMEM;
> + }
> +
> + ret = drm_colorop_curve_1d_init(dev, op, plane, supported_tfs);
> + if (ret)
> + return ret;
> +
> + list->type = op->base.id;
> + list->name = kasprintf(GFP_KERNEL, "Color Pipeline %d", op->base.id);
> +
> + prev_op = op;
> +
> + /* 2nd op: 1d curve */
> + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
> + if (!op) {
> + DRM_ERROR("KMS: Failed to allocate colorop\n");
> + return -ENOMEM;
> + }

There is no need to cleanup previously created colorop here?

> + ret = drm_colorop_curve_1d_init(dev, op, plane, supported_tfs);
> + if (ret)
> + return ret;

ditto?

> + drm_colorop_set_next_property(prev_op, op);
> +
> + return 0;
> +}
> +
> +int vkms_initialize_colorops(struct drm_plane *plane)
> +{
> + struct drm_prop_enum_list pipelines[MAX_COLOR_PIPELINES];
> + int len = 0;
> + int ret;
> +
> + /* Add color pipeline */
> + ret = vkms_initialize_color_pipeline(plane, &(pipelines[len]));
> + if (ret)
> + return ret;
> + len++;

The usage of len is not very clear, can you maybe remove it? Or do you 
plan to use it for instanciating multiple pipelines?

struct drm_prop_enum_list pipeline;
ret = vkms_initialize_color_pipeline(plane, &pipeline);
drm_plane_create_color_pipeline_property(plane, &pipeline, 1);

> + /* Create COLOR_PIPELINE property and attach */
> + drm_plane_create_color_pipeline_property(plane, pipelines, len);

This function can fail, can you also test the result here?

> +
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
> b/drivers/gpu/drm/vkms/vkms_composer.c
> index 3ecda70c2b55..bc116d16e378 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -12,6 +12,7 @@
>  #include 
>  
>  #include "vkms_drv.h"
> +#include "vkms_luts.h"
>  
>  static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha)
>  {
> @@ -163,6 +164,53 @@ static void apply_lut(const struct vkms_crtc_state 
> *crtc_state, struct line_buff
>   }
>  }
>  
> +static void apply_colorop(struct pixel_argb_u16 *pixel, struct drm_colorop 
> *colorop)
> +{
> + struct drm_colorop_state *colorop_state = colorop->state;
> +
> + if (colorop->type == DRM_COLOROP_1D_CURVE) {
> + switch (colorop_state->curve_1d_type) {
> + case DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF:
> + pixel->r = 
> apply_lut_to_channel_value(&srgb_inv_eotf, pixel->r, LUT_RED);
> + pixel->g = 
> apply_lut_to_channel_value(&srgb_inv_eotf, pixel->g, LUT_GREEN);
> + pixel->b = 
> apply_lut_to_channel_value(&srgb_inv_eotf, pixel->b, LUT_BLUE);
> + break;
> + case DRM_COLOROP_1D_CURVE_SRGB_EOTF:
> + pixel->r = 
> apply_lut_to_channel_value(&srgb_eotf, pixel->r, LUT_RED);
> + pixel->g = 
> apply_lut_to_channel_value(&srgb_eotf, pixel->g, LUT_GREEN);
> + pixel->b = 
> apply_lut_to_channel_value(&srgb_eotf, pixel->b, LUT_BLUE);
> + break;
> + default:
> + DRM_DEBUG_DRIVER("unkown colorop 1D curve type 
> %d\n", colorop_state->curve_1d_type);

Maybe add a ONCE/RATELIMITED here, this function is called for every 
pixel.

> + break;
> + }
> + }
> +
> +}
> +
> +static void pre_blend_color_transform(const struct vkms_plane_state 
> *plane_state, struct line_buffer *output_buffer)
> +{
> + struct drm_colorop *colorop = plane_state->base.base.color_pipeline;
> +
> + while (colorop) {
> + struct drm_colorop_state *colorop_state;
> +
> + if (!colorop)
> + return;
> +
> + colorop_state = colorop->state;
> +
> + if (!colorop_state)
> + return;
> +
> + for (size_t x = 0; x < output_buffer->n_pixels; x++)
> + if (!colorop_state->bypass)

Maybe we can swap the for and the if? I don't know the performance impact 
is huge, but it feel more logical to check bypass before iterating.

> + apply_colorop(&output_buffer->pixels[x], 
> colorop);
> +
> + colorop = colorop->next;
> + }
> +}
> +
>  /**
>   * blend - blend the pixels fr

Re: [PATCH v5 00/44] Color Pipeline API w/ VKMS

2024-08-30 Thread Xaver Hugl
Hi,

I have a WIP implementation of this for KWin at
https://invent.kde.org/plasma/kwin/-/commits/work/zamundaaa/drm-colorop.
It maps KWin's color pipeline to the drm one to get (primary plane
only, for now) direct scanout of HDR content on SDR displays while
doing tone mapping (or SDR on HDR, but that's been possible before).
It currently uses the two 1D custom luts and the 3D lut for this; once
I add support for named 1D curves, it should also make use of that and
the matrix. I haven't tested the Intel version yet, but if it can
match the pipeline, it should work the same there.

In testing your amd-color-pipeline-v5 branch I get two crashes in the
kernel though: https://invent.kde.org/-/snippets/3217
The null pointer dereference only happens sometimes, seemingly
randomly, but if I make KWin generate new blobs each frame, I can 100%
reliably reproduce the list_add corruption problem by just opening
glxgears in fullscreen while the color profile of the display is set
to "built in" (which makes it use the 3D LUT for gamut mapping).
Here's a branch that generates the color pipeline each frame and
triggers this: 
https://invent.kde.org/plasma/kwin/-/commits/work/zamundaaa/drm-colorop-list-add-corruption



Am Mo., 19. Aug. 2024 um 22:57 Uhr schrieb Harry Wentland
:
>
> This is an RFC set for a color pipeline API, along with implementations
> in VKMS and amdgpu. It is tested with a set of IGT tests that can be
> found at [1]. The IGT tests run a pixel-by-pixel comparison with an
> allowable delta variation as the goal for these transformations is
> perceptual correctness, not complete pixel accuracy.
>
> v5 of this patchset fleshed out documentation for colorops and the
> various defines that are being introduced.
>
> VKMS supports two named transfer function colorops and two matrix
> colorops.
>
> Amdgpu advertises the following pipeline for GPUs with DCN 3 or newer:
>
> 1. 1D Curve EOTF
> 2. 3x4 CTM
> 3. Multiplier
> 4. 1D Curve Inverse EOTF
> 5. 1D LUT
> 6. 3D LUT
> 7. 1D Curve EOTF
> 8. 1D LUT
>
> The supported curves for the 1D Curve type are:
> - sRGB EOTF and its inverse
> - PQ EOTF, scaled to [0.0, 125.0] and its inverse
> - BT.2020/BT.709 OETF and its inverse
>
> Note that the 1st and 5th colorops take the EOTF or Inverse
> OETF while the 3rd colorop takes the Inverse EOTF or OETF.
>
> The 3D LUT is a 17^3 tetrahedrally interpolated LUT but the mechanism
> exists for other drivers to describe their own 3D LUT capability.
>
> This mirrors the color pipeline used by gamescope and presented by
> Melissa Wen, with the exception of the DEGAM LUT, which is not currently
> used. See [1]
> https://indico.freedesktop.org/event/4/contributions/186/attachments/138/218/xdc2023-TheRainbowTreasureMap-MelissaWen.pdf
>
> At this point we're hoping to see gamescope and kwin implementations
> take shape. The existing pipeline should be enough to satisfy the
> gamescope use-cases on the drm_plane.
>
> In order to support YUV we'll need to add COLOR_ENCODING and COLOR_RANGE
> support to the color pipeline. I have sketched these out already but
> don't have it all hooked up yet. This should not hinder adoption of this
> API for gaming use-cases.
>
> We'll also want to advertise IN_FORMATS on a color pipeline as some
> color pipelines won't be able to work for all IN_FORMATS on a plane.
> Again, I have a sketch but no full implementation yet. This is not
> currently required by the AMD color pipeline and could be added after
> the merge of this set.
>
> VKMS patches could still be improved in a few ways, though the
> payoff might be limited and I would rather focus on other work
> at the moment. The most obvious thing to improve would be to
> eliminate the hard-coded LUTs for identity, and sRGB, and replace
> them with fixed-point math instead.
>
> There are plenty of things that I would like to see, but they could
> be added after the merge of this patchset:
>  - COLOR_ENCODING and COLOR_RANGE
>  - IN_FORMATS for a color pipeline
>  - Is it possible to support HW which can't bypass entire pipeline?
>  - Can we do a LOAD / COMMIT model for LUTs (and other properties)?
>  - read-only scaling colorop which defines scaling taps and position
>  - named matrices, for things like converting YUV to RGB
>  - Add custom LUT colorops to VKMS
>
> IGT tests can be found at [1] or on the igt-dev mailing list.
>
> A kernel branch can be found at [2].
>
> I've also rebased Uma and Chaitanya's patches for the Intel color
> pipeline on top of this to show how I envision them to mesh with
> my changes. The relevant branches can be found at [3] for the kernel
> and [4] for IGT. There were some rebase conflicts in i915 and I'm
> not entirely sure I've resolved all of them correctly, but the branch
> compiles and shows my thoughts for the new DRM concepts to support
> Intel's pipeline.
>
> [1] 
> https://gitlab.freedesktop.org/hwentland/igt-gpu-tools/-/tree/amd-color-pipeline-v5
> [2] 
> https://gitlab.freedesktop.org/hwentland/linux/-/tree/a

Re: [PATCH v5 03/44] drm/vkms: Add kunit tests for VKMS LUT handling

2024-08-30 Thread Louis Chauvet
Le 19/08/24 - 16:56, Harry Wentland a écrit :

[...]

> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
> b/drivers/gpu/drm/vkms/vkms_composer.c
> index 3d6785d081f2..3ecda70c2b55 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -435,3 +435,7 @@ int vkms_set_crc_source(struct drm_crtc *crtc, const char 
> *src_name)
diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index 3d6785d081f2..3ecda70c2b55 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -435,3 +435,7 @@ int vkms_set_crc_source(struct drm_crtc *crtc, const char 
*src_name)
 
return ret;
 }
+
+#ifdef CONFIG_DRM_VKMS_KUNIT_TESTS
+#include "tests/vkms_color_tests.c"
+#endif>  
>   return ret;
>  }
> +
> +#ifdef CONFIG_DRM_VKMS_KUNIT_TESTS
> +#include "tests/vkms_color_tests.c"
> +#endif

This is very strange to include a .c in this file, is it something done a 
lot in the kernel? I can find only one occurence of this pattern in the 
kernel [1], the other tests are in their own modules.

In addition it crate many warning during compilations:
warning: symbol 'test_*' was not declared. Should it be static?

As other tests will be introduced (yuv [2], config [3]), it is maybe 
interesting to introduce a new module as [2] is doing?

[1]: https://elixir.bootlin.com/linux/v6.11-rc5/source/fs/ext4/mballoc.c#L7047
[2]: https://lore.kernel.org/all/20240809-yuv-v10-14-1a7c76416...@bootlin.com/
[3]: 
https://lore.kernel.org/all/20240814-google-remove-crtc-index-from-parameter-v1-15-6e179abf9...@bootlin.com/



Re: [PATCH v5 05/44] drm/colorop: Introduce new drm_colorop mode object

2024-08-30 Thread Louis Chauvet
Le 19/08/24 - 16:56, Harry Wentland a écrit :

[...]

> +#ifndef __DRM_COLOROP_H__
> +#define __DRM_COLOROP_H__
> +
> +#include 
> +#include 
> +#include 
> +
> +/**
> + * struct drm_colorop_state - mutable colorop state
> + */
> +struct drm_colorop_state {
> + /** @colorop: backpointer to the colorop */
> + struct drm_colorop *colorop;
> +
> + /* colorop properties */

Can you add a comment like this, so it is clear that not all the fields 
are valid and only a few of them will be used.

/*
 * Color properties
 *
 * The following fields are not always valid, their usage depends
 * on the colorop type. See their associated comment for more 
 * information.
 */

> +
> + /** @state: backpointer to global drm_atomic_state */
> + struct drm_atomic_state *state;
> +};
> +
> +/**
> + * struct drm_colorop - DRM color operation control structure
> + *
> + * A colorop represents one color operation. They can be chained via
> + * the 'next' pointer to build a color pipeline.
> + */
> +struct drm_colorop {
> + /** @dev: parent DRM device */
> + struct drm_device *dev;
> +
> + /**
> +  * @head:
> +  *
> +  * List of all colorops on @dev, linked from 
> &drm_mode_config.colorop_list.
> +  * Invariant over the lifetime of @dev and therefore does not need
> +  * locking.
> +  */
> + struct list_head head;
> +
> + /**
> +  * @index: Position inside the mode_config.list, can be used as an array
> +  * index. It is invariant over the lifetime of the colorop.
> +  */
> + unsigned index;
> +
> + /** @base: base mode object */
> + struct drm_mode_object base;
> +
> + /**
> +  * @plane:
> +  *
> +  * The plane on which the colorop sits. A drm_colorop is always unique
> +  * to a plane.
> +  */
> + struct drm_plane *plane;
> +
> + /**
> +  * @state:
> +  *
> +  * Current atomic state for this colorop.
> +  *
> +  * This is protected by @mutex. Note that nonblocking atomic commits
> +  * access the current colorop state without taking locks.
> +  */
> + struct drm_colorop_state *state;
> +
> + /* colorop properties */

Maybe the same kind of comment here?

> + /** @properties: property tracking for this colorop */
> + struct drm_object_properties properties;
> +
> +};
> +

[...]

Louis Chauvet


Re: [PATCH v5 00/44] Color Pipeline API w/ VKMS

2024-08-30 Thread Louis Chauvet
Le 19/08/24 - 16:56, Harry Wentland a écrit :
> This is an RFC set for a color pipeline API, along with implementations
> in VKMS and amdgpu. It is tested with a set of IGT tests that can be
> found at [1]. The IGT tests run a pixel-by-pixel comparison with an
> allowable delta variation as the goal for these transformations is
> perceptual correctness, not complete pixel accuracy.
> 
> v5 of this patchset fleshed out documentation for colorops and the
> various defines that are being introduced.
> 
> VKMS supports two named transfer function colorops and two matrix
> colorops.
> 
> Amdgpu advertises the following pipeline for GPUs with DCN 3 or newer:
> 
> 1. 1D Curve EOTF
> 2. 3x4 CTM
> 3. Multiplier
> 4. 1D Curve Inverse EOTF
> 5. 1D LUT
> 6. 3D LUT
> 7. 1D Curve EOTF
> 8. 1D LUT
> 
> The supported curves for the 1D Curve type are:
> - sRGB EOTF and its inverse
> - PQ EOTF, scaled to [0.0, 125.0] and its inverse
> - BT.2020/BT.709 OETF and its inverse
> 
> Note that the 1st and 5th colorops take the EOTF or Inverse
> OETF while the 3rd colorop takes the Inverse EOTF or OETF.
> 
> The 3D LUT is a 17^3 tetrahedrally interpolated LUT but the mechanism
> exists for other drivers to describe their own 3D LUT capability.
> 
> This mirrors the color pipeline used by gamescope and presented by
> Melissa Wen, with the exception of the DEGAM LUT, which is not currently
> used. See [1]
> https://indico.freedesktop.org/event/4/contributions/186/attachments/138/218/xdc2023-TheRainbowTreasureMap-MelissaWen.pdf
> 
> At this point we're hoping to see gamescope and kwin implementations
> take shape. The existing pipeline should be enough to satisfy the
> gamescope use-cases on the drm_plane.
> 
> In order to support YUV we'll need to add COLOR_ENCODING and COLOR_RANGE
> support to the color pipeline. I have sketched these out already but
> don't have it all hooked up yet. This should not hinder adoption of this
> API for gaming use-cases.
> 
> We'll also want to advertise IN_FORMATS on a color pipeline as some
> color pipelines won't be able to work for all IN_FORMATS on a plane.
> Again, I have a sketch but no full implementation yet. This is not
> currently required by the AMD color pipeline and could be added after
> the merge of this set.
> 
> VKMS patches could still be improved in a few ways, though the
> payoff might be limited and I would rather focus on other work
> at the moment. The most obvious thing to improve would be to
> eliminate the hard-coded LUTs for identity, and sRGB, and replace
> them with fixed-point math instead.

Hi!

I reviewed your VKMS patches and added a few comments in your series. This 
series looks very good.

Thanks for this work,
Louis Chauvet
 
> There are plenty of things that I would like to see, but they could
> be added after the merge of this patchset:
>  - COLOR_ENCODING and COLOR_RANGE
>  - IN_FORMATS for a color pipeline
>  - Is it possible to support HW which can't bypass entire pipeline?
>  - Can we do a LOAD / COMMIT model for LUTs (and other properties)?
>  - read-only scaling colorop which defines scaling taps and position
>  - named matrices, for things like converting YUV to RGB
>  - Add custom LUT colorops to VKMS
> 
> IGT tests can be found at [1] or on the igt-dev mailing list.
> 
> A kernel branch can be found at [2].
> 
> I've also rebased Uma and Chaitanya's patches for the Intel color
> pipeline on top of this to show how I envision them to mesh with
> my changes. The relevant branches can be found at [3] for the kernel
> and [4] for IGT. There were some rebase conflicts in i915 and I'm
> not entirely sure I've resolved all of them correctly, but the branch
> compiles and shows my thoughts for the new DRM concepts to support
> Intel's pipeline.
> 
> [1] 
> https://gitlab.freedesktop.org/hwentland/igt-gpu-tools/-/tree/amd-color-pipeline-v5
> [2] 
> https://gitlab.freedesktop.org/hwentland/linux/-/tree/amd-color-pipeline-v5
> [3] 
> https://gitlab.freedesktop.org/hwentland/linux/-/tree/amd-intel-color-pipeline-v5
> [4] 
> https://gitlab.freedesktop.org/hwentland/igt-gpu-tools/-/tree/amd-intel-color-pipeline-v5
> 
> 
> v5:
>  - amdgpu 3D LUT
>  - Don't require BYPASS
>  - update RFC docs and add to TOC tree
>  - add drm_colorop and COLOR_PIPELINE kernel docs (non-RFC)
>  - add amdgpu color pipeline doc
>  - define SIZE property similar to drm_crtc's GAMMA_SIZE
>  - various minor fixes and cleanups
> 
> v4:
>  - Add amdgpu color pipeline (WIP)
>  - Don't block setting of deprecated properties, instead pass client cap
>to atomic check so drivers can ignore these props
>  - Drop IOCTL definitions (Pekka)
>  - Use enum property for colorop TYPE (Pekka)
>  - A few cleanups to the docs (Pekka)
>  - Rework the TYPE enum to name relation to avoid code duplication (Pekka)
>  - Add missing function declarations (Chaitanya Kumar Borah)
>  - Allow setting of NEXT property to NULL in _set_ function (Chaitanya Kumar 
> Borah)
>  - Add helper for creation of pipeline drm_plan

Re: [PATCH v5 18/44] drm/vkms: Use s32 for internal color pipeline precision

2024-08-30 Thread Louis Chauvet
Le 19/08/24 - 16:56, Harry Wentland a écrit :
> Certain operations require us to preserve values below 0.0 and
> above 1.0 (0x0 and 0x respectively in 16 bpc unorm). One
> such operation is a BT709 encoding operation followed by its
> decoding operation, or the reverse.
> 
> We'll use s32 values as intermediate in and outputs of our
> color operations, for the operations where it matters.
>
> For now this won't apply to LUT operations. We might want to
> update those to work on s32 as well, but it's unclear how
> that should work for unorm LUT definitions. We'll revisit
> that once we add LUT + CTM tests.
> 
> In order to allow for this we'll also invert the nesting of our
> colorop processing loops. We now use the pixel iteration loop
> on the outside and the colorop iteration on the inside.

Maybe you can do this inversion on your first commit so it will reduce the 
code change here?

> v4:
>  - Clarify that we're packing 16-bit UNORM into s32, not
>converting values to a different representation (Pekka)
> 
> v3:
>  - Use new colorop->next pointer
> 
> Signed-off-by: Harry Wentland 
> ---
>  drivers/gpu/drm/vkms/vkms_composer.c | 55 ++--
>  drivers/gpu/drm/vkms/vkms_drv.h  |  4 ++
>  2 files changed, 47 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
> b/drivers/gpu/drm/vkms/vkms_composer.c
> index bc116d16e378..6e939d3a6d5c 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -164,7 +164,7 @@ static void apply_lut(const struct vkms_crtc_state 
> *crtc_state, struct line_buff
>   }
>  }
>  
> -static void apply_colorop(struct pixel_argb_u16 *pixel, struct drm_colorop 
> *colorop)
> +static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop 
> *colorop)
>  {
>   struct drm_colorop_state *colorop_state = colorop->state;
>  
> @@ -190,24 +190,55 @@ static void apply_colorop(struct pixel_argb_u16 *pixel, 
> struct drm_colorop *colo
>  
>  static void pre_blend_color_transform(const struct vkms_plane_state 
> *plane_state, struct line_buffer *output_buffer)
>  {
> - struct drm_colorop *colorop = plane_state->base.base.color_pipeline;
> + struct drm_colorop *colorop;
> + struct pixel_argb_s32 pixel;
>  
> - while (colorop) {
> - struct drm_colorop_state *colorop_state;
> + for (size_t x = 0; x < output_buffer->n_pixels; x++) {
>  
> - if (!colorop)
> - return;
> + /*
> +  * Some operations, such as applying a BT709 encoding matrix,
> +  * followed by a decoding matrix, require that we preserve
> +  * values above 1.0 and below 0.0 until the end of the pipeline.
> +  *
> +  * Pack the 16-bit UNORM values into s32 to give us head-room to
> +  * avoid clipping until we're at the end of the pipeline. Clip
> +  * intentionally at the end of the pipeline before packing
> +  * UNORM values back into u16.
> +  */
> + pixel.a = output_buffer->pixels[x].a;
> + pixel.r = output_buffer->pixels[x].r;
> + pixel.g = output_buffer->pixels[x].g;
> + pixel.b = output_buffer->pixels[x].b;
>
> - colorop_state = colorop->state;
> + colorop = plane_state->base.base.color_pipeline;
> + while (colorop) {
> + struct drm_colorop_state *colorop_state;
>  
> - if (!colorop_state)
> - return;
> + if (!colorop)
> + return;

I think this is useless, the while should be sufficient.

> +
> + colorop_state = colorop->state;
> +
> + if (!colorop_state)
> + return;
>  
> - for (size_t x = 0; x < output_buffer->n_pixels; x++)
>   if (!colorop_state->bypass)
> - apply_colorop(&output_buffer->pixels[x], 
> colorop);
> + apply_colorop(&pixel, colorop);
>  
> - colorop = colorop->next;
> + colorop = colorop->next;
> + }
> +
> + /* clamp pixel */
> + pixel.a = max(min(pixel.a, 0x), 0x0);
> + pixel.r = max(min(pixel.r, 0x), 0x0);
> + pixel.g = max(min(pixel.g, 0x), 0x0);
> + pixel.b = max(min(pixel.b, 0x), 0x0);

clamp can't be used here? And can't we store the result directly in 
output_buffer?

> +
> + /* put back to output_buffer */
> + output_buffer->pixels[x].a = pixel.a;
> + output_buffer->pixels[x].r = pixel.r;
> + output_buffer->pixels[x].g = pixel.g;
> + output_buffer->pixels[x].b = pixel.b;
>
>   }
>  }
>  
> diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h
> index 278cf35

Re: [PATCH v5 41/44] drm/colorop: allow non-bypass colorops

2024-08-30 Thread Louis Chauvet
Le 19/08/24 - 16:57, Harry Wentland a écrit :
> Not all HW will be able to do bypass on all color
> operations. Introduce an 'allow_bypass' boolean for
> all colorop init functions and only create the BYPASS
> property when it's true.

You did not change the documentation of struct drm_colorop_state, so can 
we expect state->bypass to be always valid (ie. false when bypass is not 
possible)?

I don't think so, because drm_atomic_helper_colorop_duplicate_state 
/ drm_colorop_reset are setting the bypass to true. Maybe you can add 
something like this?

/**
 * @bypass:
 *
 * When the property BYPASS exists on this colorop, this stores 
 * the requested bypass state: true if colorop shall be bypassed, 
 * false if colorop is enabled.
 */
 
[...]



Re: [PATCH v5 19/44] drm/vkms: add 3x4 matrix in color pipeline

2024-08-30 Thread Louis Chauvet
Le 19/08/24 - 16:56, Harry Wentland a écrit :
> We add two 3x4 matrices into the VKMS color pipeline. The reason
> we're adding matrices is so that we can test that application
> of a matrix and its inverse yields an output equal to the input
> image.
> 
> One complication with the matrix implementation has to do with
> the fact that the matrix entries are in signed-magnitude fixed
> point, whereas the drm_fixed.h implementation uses 2s-complement.
> The latter one is the one that we want for easy addition and
> subtraction, so we convert all entries to 2s-complement.

Is there a reason to use signed-magnitude and not 2s-complement here? I 
did not read the whole amd driver, but it seems that the matrix is always 
converted to fixed point notation (amdgpu_dm_fixpt_from_s3132 in 
amdgpu_dm_color.c). It may reduce the complexity here and in the amd 
driver too.

> 
> Signed-off-by: Harry Wentland 
> ---
>  drivers/gpu/drm/vkms/vkms_colorop.c  | 32 +++-
>  drivers/gpu/drm/vkms/vkms_composer.c | 27 +++
>  2 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c 
> b/drivers/gpu/drm/vkms/vkms_colorop.c
> index f61dfde47156..adcb08153a09 100644
> --- a/drivers/gpu/drm/vkms/vkms_colorop.c
> +++ b/drivers/gpu/drm/vkms/vkms_colorop.c
> @@ -37,7 +37,37 @@ static int vkms_initialize_color_pipeline(struct drm_plane 
> *plane, struct drm_pr
>  
>   prev_op = op;
>  
> - /* 2nd op: 1d curve */
> + /* 2nd op: 3x4 matrix */
> + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
> + if (!op) {
> + DRM_ERROR("KMS: Failed to allocate colorop\n");
> + return -ENOMEM;
> + }

Same as before, don't we leak memory/properties here?

> + ret = drm_colorop_ctm_3x4_init(dev, op, plane);
> + if (ret)
> + return ret;
> +
> + drm_colorop_set_next_property(prev_op, op);
> +
> + prev_op = op;
> +
> + /* 3rd op: 3x4 matrix */
> + op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
> + if (!op) {
> + DRM_ERROR("KMS: Failed to allocate colorop\n");
> + return -ENOMEM;
> + }
> +
> + ret = drm_colorop_ctm_3x4_init(dev, op, plane);
> + if (ret)
> + return ret;
> +
> + drm_colorop_set_next_property(prev_op, op);
> +
> + prev_op = op;
> +
> + /* 4th op: 1d curve */
>   op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
>   if (!op) {
>   DRM_ERROR("KMS: Failed to allocate colorop\n");
> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
> b/drivers/gpu/drm/vkms/vkms_composer.c
> index 6e939d3a6d5c..bd1df06ced85 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -164,6 +164,30 @@ static void apply_lut(const struct vkms_crtc_state 
> *crtc_state, struct line_buff
>   }
>  }
>  
> +static void apply_3x4_matrix(struct pixel_argb_s32 *pixel, const struct 
> drm_color_ctm_3x4 *matrix)
> +{
> + s64 rf, gf, bf;
> +
> + rf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[0]), 
> drm_int2fixp(pixel->r)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[1]), 
> drm_int2fixp(pixel->g)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[2]), 
> drm_int2fixp(pixel->b)) +
> +  drm_sm2fixp(matrix->matrix[3]);
> +
> + gf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[4]), 
> drm_int2fixp(pixel->r)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[5]), 
> drm_int2fixp(pixel->g)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[6]), 
> drm_int2fixp(pixel->b)) +
> +  drm_sm2fixp(matrix->matrix[7]);
> +
> + bf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[8]), 
> drm_int2fixp(pixel->r)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[9]), 
> drm_int2fixp(pixel->g)) +
> +  drm_fixp_mul(drm_sm2fixp(matrix->matrix[10]), 
> drm_int2fixp(pixel->b)) +
> +  drm_sm2fixp(matrix->matrix[11]);
> +
> + pixel->r = drm_fixp2int(rf);
> + pixel->g = drm_fixp2int(gf);
> + pixel->b = drm_fixp2int(bf);

There is no need to round here, like done in [1]?

I don't know if the performance improvment is huge, bug maybe you can 
precompute drm_int2fixp(pixel->r/g/b)?

[1]: https://lore.kernel.org/all/20240802-yuv-v9-12-08a706669...@bootlin.com/

> +}
> +
>  static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop 
> *colorop)
>  {
>   struct drm_colorop_state *colorop_state = colorop->state;
> @@ -184,6 +208,9 @@ static void apply_colorop(struct pixel_argb_s32 *pixel, 
> struct drm_colorop *colo
>   DRM_DEBUG_DRIVER("unkown colorop 1D curve type 
> %d\n", colorop_state->curve_1d_type);
>   break;
>   }
> + } else if (colorop->type == DRM_COLOROP_CTM_3X4) {
> + if (colorop_state->data)
> + apply_3x4_matrix(pixel, (struct drm_color_ctm_3x4 *) 
> colorop_state->data->data);
>   }
>