from:"averne via ffmpeg\-devel"

[FFmpeg-devel] [PATCH] [GSoC 25] lavc: add a shader-based Prores hwaccel (PR #20381)

2025-08-31 Thread averne via ffmpeg-devel

PR #20381 opened by averne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20381
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20381.patch

The first few commits prepare the ground for the addition of the actual 
hwaccel, in the last commit.

Provisionary benchmark:
- AMD Radeon 6700XT: 178 fps
- Intel i7 Tiger Lake: 37 fps
- NVidia Orin Nano: 70 fps

Please don't pay too much attention to these numbers, I'm planning on 
optimizing it and submitting a follow-up series later.


>From b7435fe8a44b27155a5ad3757a49c4a2cb4b0e69 Mon Sep 17 00:00:00 2001
From: averne 
Date: Tue, 22 Jul 2025 19:06:55 +0200
Subject: [PATCH 1/5] avcodec/prores: add parser

Introduce a basic parser for ProRes frame headers.
This avoid having to decode an entire frame to
extract codec information.
---
 libavcodec/Makefile|   1 +
 libavcodec/parsers.c   |   1 +
 libavcodec/prores_parser.c | 132 +
 libavcodec/proresdec.c |   1 +
 libavformat/mov.c  |   1 +
 5 files changed, 136 insertions(+)
 create mode 100644 libavcodec/prores_parser.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3d036de4b6..51cd3db30b 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1265,6 +1265,7 @@ OBJS-$(CONFIG_PNG_PARSER)  += png_parser.o
 OBJS-$(CONFIG_PNM_PARSER)  += pnm_parser.o pnm.o
 OBJS-$(CONFIG_PRORES_RAW_PARSER)   += prores_raw_parser.o
 OBJS-$(CONFIG_QOI_PARSER)  += qoi_parser.o
+OBJS-$(CONFIG_PRORES_PARSER)   += prores_parser.o
 OBJS-$(CONFIG_RV34_PARSER) += rv34_parser.o
 OBJS-$(CONFIG_SBC_PARSER)  += sbc_parser.o
 OBJS-$(CONFIG_SIPR_PARSER) += sipr_parser.o
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index b12c48f79f..c922b65ce5 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -66,6 +66,7 @@ extern const AVCodecParser ff_mpeg4video_parser;
 extern const AVCodecParser ff_mpegaudio_parser;
 extern const AVCodecParser ff_mpegvideo_parser;
 extern const AVCodecParser ff_opus_parser;
+extern const AVCodecParser ff_prores_parser;
 extern const AVCodecParser ff_png_parser;
 extern const AVCodecParser ff_pnm_parser;
 extern const AVCodecParser ff_prores_raw_parser;
diff --git a/libavcodec/prores_parser.c b/libavcodec/prores_parser.c
new file mode 100644
index 00..0dd0c2bc3a
--- /dev/null
+++ b/libavcodec/prores_parser.c
@@ -0,0 +1,132 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "bytestream.h"
+
+#include "avcodec.h"
+
+static int parse(AVCodecParserContext *s,
+ AVCodecContext *avctx,
+ const uint8_t **poutbuf, int *poutbuf_size,
+ const uint8_t *buf, int buf_size)
+{
+GetByteContext gb;
+uint8_t flags, depth, chroma_format, alpha_channel_type;
+
+/* Frame fields + frame header size */
+if (buf_size < 28)
+return buf_size;
+
+bytestream2_init(&gb, buf, buf_size);
+
+/* Frame size */
+if (bytestream2_get_be32(&gb) != buf_size)
+return buf_size;
+
+/* Frame identifier */
+if (bytestream2_get_le32(&gb) != MKTAG('i','c','p','f'))
+return buf_size;
+
+/* Frame header size */
+if (bytestream2_get_be16(&gb) < 20)
+return buf_size;
+
+bytestream2_skip(&gb, 6); /* Bitstream version, encoder identifier */
+
+switch (avctx->codec_tag) {
+case MKTAG('a','p','c','o'):
+case MKTAG('a','p','c','s'):
+case MKTAG('a','p','c','n'):
+case MKTAG('a','p','c','h'):
+depth = 10;
+break;
+case MKTAG('a','p','4','h'):
+case MKTAG('a','p','4','x'):
+depth = 12;
+break;
+default:
+return buf_size;
+}
+
+s->key_frame = 1;
+s->pict_type = AV_PICTURE_TYPE_I;
+
+s->width  = bytestream2_get_be16(&gb);
+s->height = bytestream2_get_be16(&gb);
+s->coded_width  = FFALIGN(s->width,  16);
+s->coded_height = FFALIGN(s->height, 16);
+
+flags = bytestream2_get_byte(&gb);
+
+chroma_format = flags >> 6 & 3;
+if (chroma_format < 2)
+return buf_size;
+
+/* Interlace mode */
+switch (flags >> 2 & 3) {
+case

[FFmpeg-devel] [PATCH] fate/prores: Fix missing dependency (PR #20465)

2025-09-08 Thread averne via ffmpeg-devel

PR #20465 opened by averne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20465
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20465.patch

The gray.mov test vector contains an AAC stream.


>From d2b4004495f29a903c48e492988f6951ad0d70f9 Mon Sep 17 00:00:00 2001
From: averne 
Date: Mon, 8 Sep 2025 14:17:27 +0200
Subject: [PATCH] fate/prores: Fix missing dependency

Signed-off-by: averne 
---
 tests/fate/prores.mak | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/fate/prores.mak b/tests/fate/prores.mak
index 9e05b33b9f..612be1a061 100644
--- a/tests/fate/prores.mak
+++ b/tests/fate/prores.mak
@@ -6,7 +6,7 @@ FATE_PRORES = fate-prores-422   
\
   fate-prores-alpha_skip\
   fate-prores-transparency  \
   fate-prores-transparency_skip \
-  $(if $(CONFIG_ARESAMPLE_FILTER),fate-prores-gray) \
+  $(if $(call ALLYES, ARESAMPLE_FILTER 
AAC_FIXED_DECODER),fate-prores-gray) \
 
 FATE_SAMPLES_FFMPEG-$(call FRAMECRC, MOV, PRORES, SCALE_FILTER) += 
$(FATE_PRORES)
 fate-prores: $(FATE_PRORES)
-- 
2.49.1

___
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PATCH] vulkan/prores: output LSB-padded data (PR #20755)

2025-10-26 Thread averne via ffmpeg-devel

PR #20755 opened by averne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20755
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20755.patch

As discussed on #ffmpeg-devel, this fixes video playback on mpv using the 
gpu-next VO.
Breaks frame hwdownload, pending the introduction and wiring up of MSB pixfmts.


>From 68ab3fb5b4a4512799e7e346c1335e53795994fe Mon Sep 17 00:00:00 2001
From: averne 
Date: Sun, 26 Oct 2025 22:05:07 +0100
Subject: [PATCH] vulkan/prores: output LSB-padded data

For consistency with existing Vulkan-based hwaccels
---
 libavcodec/vulkan/prores_idct.comp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vulkan/prores_idct.comp 
b/libavcodec/vulkan/prores_idct.comp
index 645cb02979..642fcb5bd5 100644
--- a/libavcodec/vulkan/prores_idct.comp
+++ b/libavcodec/vulkan/prores_idct.comp
@@ -110,14 +110,14 @@ void main(void)
 idct(block, idx, 9);
 
 float fact = 1.0f / (1 << (12 - depth)), off = 1 << (depth - 1);
-int maxv = (1 << depth) - 1;
+int maxv = (1 << depth) - 1, shift = 16 - depth;
 
 /* 7.5.1 Color Component Samples. Rescale, clamp and write back to global 
memory */
 barrier();
 if (act) {
 [[unroll]] for (uint i = 0; i < 8; ++i) {
 float v = blocks[block][i * 9 + idx] * fact + off;
-put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, 
maxv));
+put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, 
maxv) << shift);
 }
 }
 }
-- 
2.49.1

___
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PATCH] [GSoC 25] avcodec/prores: add parser (PR #20752)

2025-10-25 Thread averne via ffmpeg-devel

PR #20752 opened by averne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20752
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20752.patch

Split off from #20381.
Adds a ProRes parser, which avoids going through a full-frame decode to parse 
headers.


>From 46f936c70e15e37f6aee5621edbad0951901a800 Mon Sep 17 00:00:00 2001
From: averne 
Date: Tue, 22 Jul 2025 19:06:55 +0200
Subject: [PATCH] avcodec/prores: add parser

Introduce a basic parser for ProRes frame headers.
This avoid having to decode an entire frame to
extract codec information.
---
 libavcodec/Makefile|   1 +
 libavcodec/parsers.c   |   1 +
 libavcodec/prores_parser.c | 128 +
 libavcodec/proresdec.c |  12 ++--
 libavformat/mov.c  |   1 +
 5 files changed, 139 insertions(+), 4 deletions(-)
 create mode 100644 libavcodec/prores_parser.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3d036de4b6..51cd3db30b 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1265,6 +1265,7 @@ OBJS-$(CONFIG_PNG_PARSER)  += png_parser.o
 OBJS-$(CONFIG_PNM_PARSER)  += pnm_parser.o pnm.o
 OBJS-$(CONFIG_PRORES_RAW_PARSER)   += prores_raw_parser.o
 OBJS-$(CONFIG_QOI_PARSER)  += qoi_parser.o
+OBJS-$(CONFIG_PRORES_PARSER)   += prores_parser.o
 OBJS-$(CONFIG_RV34_PARSER) += rv34_parser.o
 OBJS-$(CONFIG_SBC_PARSER)  += sbc_parser.o
 OBJS-$(CONFIG_SIPR_PARSER) += sipr_parser.o
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index b12c48f79f..c922b65ce5 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -66,6 +66,7 @@ extern const AVCodecParser ff_mpeg4video_parser;
 extern const AVCodecParser ff_mpegaudio_parser;
 extern const AVCodecParser ff_mpegvideo_parser;
 extern const AVCodecParser ff_opus_parser;
+extern const AVCodecParser ff_prores_parser;
 extern const AVCodecParser ff_png_parser;
 extern const AVCodecParser ff_pnm_parser;
 extern const AVCodecParser ff_prores_raw_parser;
diff --git a/libavcodec/prores_parser.c b/libavcodec/prores_parser.c
new file mode 100644
index 00..d778f839bd
--- /dev/null
+++ b/libavcodec/prores_parser.c
@@ -0,0 +1,128 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "bytestream.h"
+
+#include "avcodec.h"
+
+static int parse(AVCodecParserContext *s,
+ AVCodecContext *avctx,
+ const uint8_t **poutbuf, int *poutbuf_size,
+ const uint8_t *buf, int buf_size)
+{
+GetByteContext gb;
+uint8_t flags, depth, chroma_format, alpha_channel_type;
+
+*poutbuf  = buf;
+*poutbuf_size = buf_size;
+
+/* Frame fields + frame header size */
+if (buf_size < 28)
+return buf_size;
+
+bytestream2_init(&gb, buf, buf_size);
+
+/* Frame size */
+if (bytestream2_get_be32(&gb) != buf_size)
+return buf_size;
+
+/* Frame identifier */
+if (bytestream2_get_le32(&gb) != MKTAG('i','c','p','f'))
+return buf_size;
+
+/* Frame header size */
+if (bytestream2_get_be16(&gb) < 20)
+return buf_size;
+
+bytestream2_skip(&gb, 6); /* Bitstream version, encoder identifier */
+
+s->key_frame = 1;
+s->pict_type = AV_PICTURE_TYPE_I;
+
+s->width  = bytestream2_get_be16(&gb);
+s->height = bytestream2_get_be16(&gb);
+s->coded_width  = FFALIGN(s->width,  16);
+s->coded_height = FFALIGN(s->height, 16);
+
+flags = bytestream2_get_byte(&gb);
+
+/* Interlace mode */
+switch (flags >> 2 & 3) {
+case 0:
+s->field_order   = AV_FIELD_PROGRESSIVE;
+s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
+break;
+case 1:
+s->field_order   = AV_FIELD_TT;
+s->picture_structure = AV_PICTURE_STRUCTURE_TOP_FIELD;
+break;
+case 2:
+s->field_order   = AV_FIELD_BB;
+s->picture_structure = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
+break;
+default:
+break;
+}
+
+bytestream2_skip(&gb, 4); /* Aspect ratio information, frame rate code, 
color primaries, transfer characteristic, matrix coefficients */
+
+/*

[FFmpeg-devel] [PATCH] vulkan/prores: Adopt the same IDCT routine as the prores-raw hwaccel (PR #20819)

2025-11-02 Thread averne via ffmpeg-devel

PR #20819 opened by averne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20819
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20819.patch

The added rounding at the final output conforms
to the SMPTE document and reduces the deviation
against the software decoder.


>From 7639b6fd0cec3e7ae31f1d0c2d1fc491dbd937e5 Mon Sep 17 00:00:00 2001
From: averne 
Date: Sun, 2 Nov 2025 20:23:28 +0100
Subject: [PATCH] vulkan/prores: Adopt the same IDCT routine as the prores-raw
 hwaccel

The added rounding at the final output conforms
to the SMPTE document and reduces the deviation
against the software decoder.
---
 libavcodec/vulkan/prores_idct.comp | 105 +++--
 1 file changed, 68 insertions(+), 37 deletions(-)

diff --git a/libavcodec/vulkan/prores_idct.comp 
b/libavcodec/vulkan/prores_idct.comp
index 642fcb5bd5..8ad3b7f58b 100644
--- a/libavcodec/vulkan/prores_idct.comp
+++ b/libavcodec/vulkan/prores_idct.comp
@@ -37,47 +37,77 @@ void put_px(uint tex_idx, ivec2 pos, uint v)
 #endif
 }
 
+const float idct_8x8_scales[] = {
+0.353553390593274f, // cos(4 * pi/16) / 2
+0.490392640201615f, // cos(1 * pi/16) / 2
+0.461939766255643f, // cos(2 * pi/16) / 2
+0.415734806151273f, // cos(3 * pi/16) / 2
+0.353553390593274f, // cos(4 * pi/16) / 2
+0.277785116509801f, // cos(5 * pi/16) / 2
+0.191341716182545f, // cos(6 * pi/16) / 2
+0.097545161008064f, // cos(7 * pi/16) / 2
+};
+
 /* 7.4 Inverse Transform */
 void idct(uint block, uint offset, uint stride)
 {
-float c0 = blocks[block][0*stride + offset];
-float c1 = blocks[block][1*stride + offset];
-float c2 = blocks[block][2*stride + offset];
-float c3 = blocks[block][3*stride + offset];
-float c4 = blocks[block][4*stride + offset];
-float c5 = blocks[block][5*stride + offset];
-float c6 = blocks[block][6*stride + offset];
-float c7 = blocks[block][7*stride + offset];
+float t0, t1, t2, t3, t4, t5, t6, t7, u8;
+float u0, u1, u2, u3, u4, u5, u6, u7;
 
-float tmp1 = c6 * 1.4142134189605712891 + (c2 - c6);
-float tmp2 = c6 * 1.4142134189605712891 - (c2 - c6);
+/* Input */
+t0 = blocks[block][0*stride + offset];
+u4 = blocks[block][1*stride + offset];
+t2 = blocks[block][2*stride + offset];
+u6 = blocks[block][3*stride + offset];
+t1 = blocks[block][4*stride + offset];
+u5 = blocks[block][5*stride + offset];
+t3 = blocks[block][6*stride + offset];
+u7 = blocks[block][7*stride + offset];
 
-float a1 = (c0 + c4) * 0.35355341434478759766 + tmp1 * 
0.46193981170654296875;
-float a4 = (c0 + c4) * 0.35355341434478759766 - tmp1 * 
0.46193981170654296875;
+/* Embedded scaled inverse 4-point Type-II DCT */
+u0 = t0 + t1;
+u1 = t0 - t1;
+u3 = t2 + t3;
+u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;
+t0 = u0 + u3;
+t3 = u0 - u3;
+t1 = u1 + u2;
+t2 = u1 - u2;
 
-float a3 = (c0 - c4) * 0.35355341434478759766 + tmp2 * 
0.19134169816970825195;
-float a2 = (c0 - c4) * 0.35355341434478759766 - tmp2 * 
0.19134169816970825195;
+/* Embedded scaled inverse 4-point Type-IV DST */
+t5 = u5 + u6;
+t6 = u5 - u6;
+t7 = u4 + u7;
+t4 = u4 - u7;
+u7 = t7 + t5;
+u5 = (t7 - t5)*(1.4142135623730950488016887242097f);
+u8 = (t4 + t6)*(1.8477590650225735122563663787936f);
+u4 = u8 - t4*(1.0823922002923939687994464107328f);
+u6 = u8 - t6*(2.6131259297527530557132863468544f);
+t7 = u7;
+t6 = t7 - u6;
+t5 = t6 + u5;
+t4 = t5 - u4;
 
-float tmp3 = (c3 - c5) * 0.70710682868957519531 + c7;
-float tmp4 = (c3 - c5) * 0.70710682868957519531 - c7;
+/* Butterflies */
+u0 = t0 + t7;
+u7 = t0 - t7;
+u6 = t1 + t6;
+u1 = t1 - t6;
+u2 = t2 + t5;
+u5 = t2 - t5;
+u4 = t3 + t4;
+u3 = t3 - t4;
 
-float tmp5 = (c5 - c7) *  1.4142134189605712891 + (c5 - c7) + (c1 - c3);
-float tmp6 = (c5 - c7) * -1.4142134189605712891 + (c5 - c7) + (c1 - c3);
-
-float m1 = tmp3 *  2.6131260395050048828 + tmp5;
-float m4 = tmp3 * -2.6131260395050048828 + tmp5;
-
-float m2 = tmp4 *  1.0823919773101806641 + tmp6;
-float m3 = tmp4 * -1.0823919773101806641 + tmp6;
-
-blocks[block][0*stride + offset] = m1 *  0.49039259552955627441  + a1;
-blocks[block][7*stride + offset] = m1 * -0.49039259552955627441  + a1;
-blocks[block][1*stride + offset] = m2 *  0.41573479771614074707  + a2;
-blocks[block][6*stride + offset] = m2 * -0.41573479771614074707  + a2;
-blocks[block][2*stride + offset] = m3 *  0.27778509259223937988  + a3;
-blocks[block][5*stride + offset] = m3 * -0.27778509259223937988  + a3;
-blocks[block][3*stride + offset] = m4 *  0.097545139491558074951 + a4;
-blocks[block][4*stride + offset] = m4 * -0.097545139491558074951 + a4;
+/* Output */
+blocks[block][0*stride + offset] = u0;
+blocks[block][1*stride + offset] = u1;
+blocks[block][2*stride + offset] = u2;
+blocks[block][3*s

[FFmpeg-devel] [PATCH] vulkan/prores: forward quantization parameter to the IDCT shader (PR #20870)

2025-11-08 Thread averne via ffmpeg-devel

PR #20870 opened by averne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20870
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20870.patch

The qScale syntax element has a maximum value of 512, which would overflow the 
16-bit store from the VLD shader in extreme cases.
This fixes that edge case by forwarding the element in a storage buffer, and 
applying the inverse quantization entirely in the IDCT shader.


>From 71204cc83765f4715b06a345627b320c5d5e0e70 Mon Sep 17 00:00:00 2001
From: averne 
Date: Sat, 8 Nov 2025 19:57:37 +0100
Subject: [PATCH] vulkan/prores: forward quantization parameter to the IDCT
 shader

The qScale syntax element has a maximum value of 512, which would overflow the 
16-bit store from the VLD shader in extreme cases.
This fixes that edge case by forwarding the element in a storage buffer, and 
applying the inverse quantization fully in the IDCT shader.
---
 libavcodec/vulkan/prores_idct.comp |  14 ++--
 libavcodec/vulkan/prores_vld.comp  |  28 
 libavcodec/vulkan_prores.c | 103 -
 3 files changed, 111 insertions(+), 34 deletions(-)

diff --git a/libavcodec/vulkan/prores_idct.comp 
b/libavcodec/vulkan/prores_idct.comp
index 645cb02979..f3469589e0 100644
--- a/libavcodec/vulkan/prores_idct.comp
+++ b/libavcodec/vulkan/prores_idct.comp
@@ -87,17 +87,23 @@ void main(void)
 uint chroma_shift = comp != 0 ? log2_chroma_w : 0;
 bool act = gid.x < mb_width << (4 - chroma_shift);
 
-/* Coalesced load of DCT coeffs in shared memory, second part of inverse 
quantization */
+/* Coalesced load of DCT coeffs in shared memory, inverse quantization */
 if (act) {
+MbParams p = mb_params[(gid.y >> 1) * mb_width + (gid.x >> 4)];
+
 /**
  * According to spec indexing an array in push constant memory with
  * a non-dynamically uniform value is illegal ($15.9.1 in v1.4.326),
  * so copy the whole matrix locally.
  */
 uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
+
+/* Table 15 */
+int qscale = p.quant_idx > 128 ? (p.quant_idx - 96) << 2 : p.quant_idx;
+
 [[unroll]] for (uint i = 0; i < 8; ++i) {
-int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | 
i))), 16);
-blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
+int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) + 
i))), 16);
+blocks[block][i * 9 + idx] = float(v * qscale * int(qmat[(i << 3) 
+ idx]));
 }
 }
 
@@ -117,7 +123,7 @@ void main(void)
 if (act) {
 [[unroll]] for (uint i = 0; i < 8; ++i) {
 float v = blocks[block][i * 9 + idx] * fact + off;
-put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, 
maxv));
+put_px(comp, ivec2(gid.x, (gid.y << 3) + i), clamp(int(v), 0, 
maxv));
 }
 }
 }
diff --git a/libavcodec/vulkan/prores_vld.comp 
b/libavcodec/vulkan/prores_vld.comp
index 00e78e08ff..258604fb36 100644
--- a/libavcodec/vulkan/prores_vld.comp
+++ b/libavcodec/vulkan/prores_vld.comp
@@ -57,7 +57,7 @@ uint decode_codeword(inout GetBitContext gb, int codebook)
 }
 }
 
-void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
+void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count)
 {
 uvec3 gid = gl_GlobalInvocationID;
 uint is_luma = uint(gid.z == 0);
@@ -70,7 +70,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint 
mb_count, uint qscale)
 {
 /* First coeff */
 uint c = to_signed(decode_codeword(gb, 0x650));
-put_px(gid.z, base_pos, c * qscale & 0x);
+put_px(gid.z, base_pos, c & 0x);
 
 /**
  * Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | 
((kexp or kexp + 1) << 8)
@@ -89,7 +89,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint 
mb_count, uint qscale)
 int s = int(prev_dc_diff) >> 31;
 c += prev_dc_diff = (to_signed(cw) ^ s) - s;
 
-put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale & 
0x);
+put_px(gid.z, base_pos + pos_to_block(i, is_luma), c & 0x);
 }
 }
 
@@ -152,7 +152,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint 
mb_count, uint qscale)
 ivec2 bpos = ivec2(scan & 0xf, scan >> 4);
 
 uint c = ((level + 1) ^ -s) + s;
-put_px(gid.z, base_pos + spos + bpos, c * qscale & 0x);
+put_px(gid.z, base_pos + spos + bpos, c & 0x);
 }
 }
 }
@@ -235,13 +235,8 @@ void main(void)
 u8buf bs = u8buf(slice_data + slice_off);
 
 /* Decode slice header */
-uint hdr_size, y_size, u_size, v_size, a_size;
-hdr_size = bs[0].v >> 3;
-
-/* Table 15 */
-uint qidx   = clamp(bs[1].v, 1, 224),
- qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
-
+uint hdr_size, qidx, y_size, u_size, v_size, a_size;
+hdr_size

[FFmpeg-devel] [PATCH] [GSoC 25] lavc: add a shader-based Prores hwaccel (PR #20381)

[FFmpeg-devel] [PATCH] fate/prores: Fix missing dependency (PR #20465)

[FFmpeg-devel] [PATCH] vulkan/prores: output LSB-padded data (PR #20755)

[FFmpeg-devel] [PATCH] [GSoC 25] avcodec/prores: add parser (PR #20752)

[FFmpeg-devel] [PATCH] vulkan/prores: Adopt the same IDCT routine as the prores-raw hwaccel (PR #20819)

[FFmpeg-devel] [PATCH] vulkan/prores: forward quantization parameter to the IDCT shader (PR #20870)

6 matches

Site Navigation

Mail list logo

Footer information