[FFmpeg-devel] [PATCH] avformat/rtpdec_av1: Fix fragment continuation check when OBU_HAS_SIZE_FIELD is set

2025-04-13 Thread Parallelc
When OBU_HAS_SIZE_FIELD is set in the OBU header, frag_obu_size remains 0.
The code used !frag_obu_size to check for unexpected fragment continuation,
which resulted in incorrect drops. Introduce expect_frag_cont to explicitly
track continuation expectation.

Signed-off-by: Parallelc 
---
 libavformat/rtpdec_av1.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libavformat/rtpdec_av1.c b/libavformat/rtpdec_av1.c
index 7cfc83b03c..87f8cd2b0b 100644
--- a/libavformat/rtpdec_av1.c
+++ b/libavformat/rtpdec_av1.c
@@ -53,6 +53,7 @@ struct PayloadContext {
 unsigned int frag_pkt_leb_pos;  ///< offset in buffer where OBU LEB starts
 unsigned int frag_lebs_res; ///< number of bytes reserved for LEB
 unsigned int frag_header_size;  ///< size of OBU header (1 or 2)
+int expect_frag_cont;   ///< expect fragment continuation in packet
 int needs_td;   ///< indicates that a TD should be output
 int drop_fragment;  ///< drop all fragments until next frame
 int keyframe_seen;  ///< keyframe was seen
@@ -165,7 +166,7 @@ static int av1_handle_packet(AVFormatContext *ctx, 
PayloadContext *data,
seq, expected_seq);
 goto drop_fragment;
 }
-if (!pkt->size || !data->frag_obu_size) {
+if (!pkt->size || !data->expect_frag_cont) {
 av_log(ctx, AV_LOG_WARNING, "Unexpected fragment continuation in 
AV1 RTP packet\n");
 goto drop_fragment; // avoid repeated output for the same fragment
 }
@@ -187,9 +188,11 @@ static int av1_handle_packet(AVFormatContext *ctx, 
PayloadContext *data,
 av_log(ctx, AV_LOG_TRACE, "Timestamp changed to %u (or first pkt 
%d), forcing TD\n", *timestamp, is_first_pkt);
 data->needs_td = 1;
 data->frag_obu_size = 0; // new temporal unit might have been 
caused by dropped packets
+data->expect_frag_cont = 0;
 }
-if (data->frag_obu_size) {
+if (data->expect_frag_cont) {
 data->frag_obu_size = 0; // make sure we recover
+data->expect_frag_cont = 0;
 av_log(ctx, AV_LOG_ERROR, "Missing fragment continuation in AV1 
RTP packet\n");
 return AVERROR_INVALIDDATA;
 }
@@ -362,6 +365,7 @@ static int av1_handle_packet(AVFormatContext *ctx, 
PayloadContext *data,
 write_leb(lebptr, final_obu_size);
 
 data->frag_obu_size = 0; // signal end of fragment
+data->expect_frag_cont = 0;
 } else if (is_last_fragmented && !rem_pkt_size) {
 // add to total OBU size, so we can fix that in OBU header
 // (but only if the OBU size was missing!)
@@ -370,6 +374,7 @@ static int av1_handle_packet(AVFormatContext *ctx, 
PayloadContext *data,
 }
 // fragment not yet finished!
 result = -1;
+data->expect_frag_cont = 1;
 }
 is_frag_cont = 0;
 
@@ -391,6 +396,7 @@ static int av1_handle_packet(AVFormatContext *ctx, 
PayloadContext *data,
 if (!is_last_fragmented) {
 data->frag_obu_size = 0;
 data->frag_pkt_leb_pos = 0;
+data->expect_frag_cont = 0;
 }
 
 #ifdef RTPDEC_AV1_VERBOSE_TRACE
@@ -409,6 +415,7 @@ drop_fragment:
 data->keyframe_seen = 0;
 data->drop_fragment = 1;
 data->frag_obu_size = 0;
+data->expect_frag_cont = 0;
 data->needs_td = 1;
 if (pkt->size) {
 av_log(ctx, AV_LOG_TRACE, "Dumping current AV1 frame packet\n");
-- 
2.43.0

___
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avformat/rtpdec_av1: Fix fragment continuation check when OBU_HAS_SIZE_FIELD is set

2025-04-14 Thread Parallelc
On Mon, Apr 14, 2025 at 3:13 PM Chris Hodges  wrote:
>
> Hi there,
>
> On 4/13/25 17:56, Parallelc wrote:
>
> > When OBU_HAS_SIZE_FIELD is set in the OBU header, frag_obu_size remains 0.
> > The code used !frag_obu_size to check for unexpected fragment continuation,
> > which resulted in incorrect drops. Introduce expect_frag_cont to explicitly
> > track continuation expectation.
>
> Good find! I didn't have sources that kept the OBU size field (just curious,
> what did you use to generate these RTP streams?), and your fix looks good
> to me (although I didn't verify it).
>
> --
> Best regards,
> Chris

Hi Chris,

Thanks for your review!

I'm currently working on adding WHIP and WHEP support to FFmpeg using
libdatachannel
in my personal fork: https://github.com/parallelcc/FFmpeg-WHIP-WHEP

So in my tests, the RTP streams are generated by libdatachannel or
Chrome WebRTC. They
packetize the encoder output directly, without removing the size field
like rtpenc_av1 does.

Best regards,
Parallelc
___
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".


[FFmpeg-devel] [PATCH] rtpdec_vp9: Update header parsing to RFC 9628

2025-04-19 Thread Parallelc
Signed-off-by: Parallelc 
---
 libavformat/rtpdec_vp9.c | 141 ---
 1 file changed, 72 insertions(+), 69 deletions(-)

diff --git a/libavformat/rtpdec_vp9.c b/libavformat/rtpdec_vp9.c
index 6bbdf4847a..924065b5da 100644
--- a/libavformat/rtpdec_vp9.c
+++ b/libavformat/rtpdec_vp9.c
@@ -1,5 +1,5 @@
 /*
- * RTP parser for VP9 payload format (draft version 02) - experimental
+ * RTP parser for VP9 payload format (RFC 9628) - experimental
  * Copyright (c) 2015 Thomas Volkert 
  *
  * This file is part of FFmpeg.
@@ -47,8 +47,7 @@ static int vp9_handle_packet(AVFormatContext *ctx, 
PayloadContext *rtp_vp9_ctx,
 {
 int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data;
 av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame;
-av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1;
-int ref_fields = 0, has_ref_field_ext_pic_id = 0;
+av_unused int layer_temporal = -1, layer_spatial = -1;
 int first_fragment, last_fragment;
 int rtp_m;
 int res = 0;
@@ -68,16 +67,17 @@ static int vp9_handle_packet(AVFormatContext *ctx, 
PayloadContext *rtp_vp9_ctx,
  *
  *  0 1 2 3 4 5 6 7
  * +-+-+-+-+-+-+-+-+
- * |I|P|L|F|B|E|V|-| (REQUIRED)
+ * |I|P|L|F|B|E|V|Z| (REQUIRED)
  * +-+-+-+-+-+-+-+-+
  *
- * I: PictureID present
- * P: Inter-picture predicted layer frame
+ * I: Picture ID (PID) present
+ * P: Inter-picture predicted frame
  * L: Layer indices present
  * F: Flexible mode
- * B: Start of VP9 frame
- * E: End of picture
- * V: Scalability Structure (SS) present
+ * B: Start of Frame
+ * E: End of Frame
+ * V: Scalability Structure (SS) data present
+ * Z: Not a reference frame for upper spatial layers
  */
 has_pic_id = !!(buf[0] & 0x80);
 inter_picture_layer_frame = !!(buf[0] & 0x40);
@@ -89,7 +89,7 @@ static int vp9_handle_packet(AVFormatContext *ctx, 
PayloadContext *rtp_vp9_ctx,
 
 rtp_m = !!(flags & RTP_FLAG_MARKER);
 
-/* sanity check for markers: B should always be equal to the RTP M marker 
*/
+/* sanity check for markers: E should always be equal to the RTP M marker 
*/
 if (last_fragment != rtp_m) {
 av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d 
!= %d)\n", last_fragment, rtp_m);
 return AVERROR_INVALIDDATA;
@@ -134,72 +134,70 @@ static int vp9_handle_packet(AVFormatContext *ctx, 
PayloadContext *rtp_vp9_ctx,
  *
  *  0 1 2 3 4 5 6 7
  * +-+-+-+-+-+-+-+-+
- *   L:| T | S | Q | R | (CONDITIONALLY RECOMMENDED)
+ *   L:| TID |U| SID |D| (Conditionally RECOMMENDED)
+ * +-+-+-+-+-+-+-+-+
+ * |   TL0PICIDX   | (Conditionally REQUIRED)
  * +-+-+-+-+-+-+-+-+
  *
- *   T, S and Q are 2-bit indices for temporal, spatial, and quality 
layers.
- *   If "F" is set in the initial octet, R is 2 bits representing the 
number
- *   of reference fields this frame refers to.
+ *   TID: Temporal layer ID (3 bits)
+ *   U: Switching up point (1 bit)
+ *   SID: Spatial layer ID (3 bits)
+ *   D: Inter-layer dependency used (1 bit)
+ *   TL0PICIDX: Temporal Layer 0 Picture Index (8 bits, non-flexible mode 
only)
  */
 if (has_layer_idc) {
 if (len < 1) {
 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
 return AVERROR_INVALIDDATA;
 }
-layer_temporal = buf[0] & 0xC0;
-layer_spatial  = buf[0] & 0x30;
-layer_quality  = buf[0] & 0x0C;
-if (has_ref_idc) {
-ref_fields = buf[0] & 0x03;
-if (ref_fields)
-non_key_frame = 1;
-}
+layer_temporal = buf[0] >> 5;
+layer_spatial  = (buf[0] >> 1) & 0x07;
 buf++;
 len--;
+
+if (!has_ref_idc) {
+if (len < 1) {
+av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
+return AVERROR_INVALIDDATA;
+}
+/* ignore TL0PICIDX */
+buf++;
+len--;
+}
 }
 
 /*
- * decode the reference fields
+ * decode reference indices
  *
  *  0 1 2 3 4 5 6 7
- * +-+-+-+-+-+-+-+-+  -\
- *   F:| PID |X| RS| RQ| (OPTIONAL).
- * +-+-+-+-+-+-+-+-+   . - R times
- *   X:| EXTENDED PID  | (OPTIONAL).
- * +-+-+-+-+-+-+-+-+  -/
+ * +-+-+-+-+-+-+-+-+ -\
+ *   P,F:  | P_DIFF  |N| (Conditionally REQUIRED)- up to 3 times
+ * +-+-+-+-+-+-+-+-+ -/
  *
- *   PID:  The relative Picture ID referred to by t