[FFmpeg-devel] [PATCH] avformat/rtpdec_av1: Fix fragment continuation check when OBU_HAS_SIZE_FIELD is set
When OBU_HAS_SIZE_FIELD is set in the OBU header, frag_obu_size remains 0.
The code used !frag_obu_size to check for unexpected fragment continuation,
which resulted in incorrect drops. Introduce expect_frag_cont to explicitly
track continuation expectation.
Signed-off-by: Parallelc
---
libavformat/rtpdec_av1.c | 11 +--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/libavformat/rtpdec_av1.c b/libavformat/rtpdec_av1.c
index 7cfc83b03c..87f8cd2b0b 100644
--- a/libavformat/rtpdec_av1.c
+++ b/libavformat/rtpdec_av1.c
@@ -53,6 +53,7 @@ struct PayloadContext {
unsigned int frag_pkt_leb_pos; ///< offset in buffer where OBU LEB starts
unsigned int frag_lebs_res; ///< number of bytes reserved for LEB
unsigned int frag_header_size; ///< size of OBU header (1 or 2)
+int expect_frag_cont; ///< expect fragment continuation in packet
int needs_td; ///< indicates that a TD should be output
int drop_fragment; ///< drop all fragments until next frame
int keyframe_seen; ///< keyframe was seen
@@ -165,7 +166,7 @@ static int av1_handle_packet(AVFormatContext *ctx,
PayloadContext *data,
seq, expected_seq);
goto drop_fragment;
}
-if (!pkt->size || !data->frag_obu_size) {
+if (!pkt->size || !data->expect_frag_cont) {
av_log(ctx, AV_LOG_WARNING, "Unexpected fragment continuation in
AV1 RTP packet\n");
goto drop_fragment; // avoid repeated output for the same fragment
}
@@ -187,9 +188,11 @@ static int av1_handle_packet(AVFormatContext *ctx,
PayloadContext *data,
av_log(ctx, AV_LOG_TRACE, "Timestamp changed to %u (or first pkt
%d), forcing TD\n", *timestamp, is_first_pkt);
data->needs_td = 1;
data->frag_obu_size = 0; // new temporal unit might have been
caused by dropped packets
+data->expect_frag_cont = 0;
}
-if (data->frag_obu_size) {
+if (data->expect_frag_cont) {
data->frag_obu_size = 0; // make sure we recover
+data->expect_frag_cont = 0;
av_log(ctx, AV_LOG_ERROR, "Missing fragment continuation in AV1
RTP packet\n");
return AVERROR_INVALIDDATA;
}
@@ -362,6 +365,7 @@ static int av1_handle_packet(AVFormatContext *ctx,
PayloadContext *data,
write_leb(lebptr, final_obu_size);
data->frag_obu_size = 0; // signal end of fragment
+data->expect_frag_cont = 0;
} else if (is_last_fragmented && !rem_pkt_size) {
// add to total OBU size, so we can fix that in OBU header
// (but only if the OBU size was missing!)
@@ -370,6 +374,7 @@ static int av1_handle_packet(AVFormatContext *ctx,
PayloadContext *data,
}
// fragment not yet finished!
result = -1;
+data->expect_frag_cont = 1;
}
is_frag_cont = 0;
@@ -391,6 +396,7 @@ static int av1_handle_packet(AVFormatContext *ctx,
PayloadContext *data,
if (!is_last_fragmented) {
data->frag_obu_size = 0;
data->frag_pkt_leb_pos = 0;
+data->expect_frag_cont = 0;
}
#ifdef RTPDEC_AV1_VERBOSE_TRACE
@@ -409,6 +415,7 @@ drop_fragment:
data->keyframe_seen = 0;
data->drop_fragment = 1;
data->frag_obu_size = 0;
+data->expect_frag_cont = 0;
data->needs_td = 1;
if (pkt->size) {
av_log(ctx, AV_LOG_TRACE, "Dumping current AV1 frame packet\n");
--
2.43.0
___
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avformat/rtpdec_av1: Fix fragment continuation check when OBU_HAS_SIZE_FIELD is set
On Mon, Apr 14, 2025 at 3:13 PM Chris Hodges wrote: > > Hi there, > > On 4/13/25 17:56, Parallelc wrote: > > > When OBU_HAS_SIZE_FIELD is set in the OBU header, frag_obu_size remains 0. > > The code used !frag_obu_size to check for unexpected fragment continuation, > > which resulted in incorrect drops. Introduce expect_frag_cont to explicitly > > track continuation expectation. > > Good find! I didn't have sources that kept the OBU size field (just curious, > what did you use to generate these RTP streams?), and your fix looks good > to me (although I didn't verify it). > > -- > Best regards, > Chris Hi Chris, Thanks for your review! I'm currently working on adding WHIP and WHEP support to FFmpeg using libdatachannel in my personal fork: https://github.com/parallelcc/FFmpeg-WHIP-WHEP So in my tests, the RTP streams are generated by libdatachannel or Chrome WebRTC. They packetize the encoder output directly, without removing the size field like rtpenc_av1 does. Best regards, Parallelc ___ ffmpeg-devel mailing list [email protected] https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email [email protected] with subject "unsubscribe".
[FFmpeg-devel] [PATCH] rtpdec_vp9: Update header parsing to RFC 9628
Signed-off-by: Parallelc
---
libavformat/rtpdec_vp9.c | 141 ---
1 file changed, 72 insertions(+), 69 deletions(-)
diff --git a/libavformat/rtpdec_vp9.c b/libavformat/rtpdec_vp9.c
index 6bbdf4847a..924065b5da 100644
--- a/libavformat/rtpdec_vp9.c
+++ b/libavformat/rtpdec_vp9.c
@@ -1,5 +1,5 @@
/*
- * RTP parser for VP9 payload format (draft version 02) - experimental
+ * RTP parser for VP9 payload format (RFC 9628) - experimental
* Copyright (c) 2015 Thomas Volkert
*
* This file is part of FFmpeg.
@@ -47,8 +47,7 @@ static int vp9_handle_packet(AVFormatContext *ctx,
PayloadContext *rtp_vp9_ctx,
{
int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data;
av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame;
-av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1;
-int ref_fields = 0, has_ref_field_ext_pic_id = 0;
+av_unused int layer_temporal = -1, layer_spatial = -1;
int first_fragment, last_fragment;
int rtp_m;
int res = 0;
@@ -68,16 +67,17 @@ static int vp9_handle_packet(AVFormatContext *ctx,
PayloadContext *rtp_vp9_ctx,
*
* 0 1 2 3 4 5 6 7
* +-+-+-+-+-+-+-+-+
- * |I|P|L|F|B|E|V|-| (REQUIRED)
+ * |I|P|L|F|B|E|V|Z| (REQUIRED)
* +-+-+-+-+-+-+-+-+
*
- * I: PictureID present
- * P: Inter-picture predicted layer frame
+ * I: Picture ID (PID) present
+ * P: Inter-picture predicted frame
* L: Layer indices present
* F: Flexible mode
- * B: Start of VP9 frame
- * E: End of picture
- * V: Scalability Structure (SS) present
+ * B: Start of Frame
+ * E: End of Frame
+ * V: Scalability Structure (SS) data present
+ * Z: Not a reference frame for upper spatial layers
*/
has_pic_id = !!(buf[0] & 0x80);
inter_picture_layer_frame = !!(buf[0] & 0x40);
@@ -89,7 +89,7 @@ static int vp9_handle_packet(AVFormatContext *ctx,
PayloadContext *rtp_vp9_ctx,
rtp_m = !!(flags & RTP_FLAG_MARKER);
-/* sanity check for markers: B should always be equal to the RTP M marker
*/
+/* sanity check for markers: E should always be equal to the RTP M marker
*/
if (last_fragment != rtp_m) {
av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d
!= %d)\n", last_fragment, rtp_m);
return AVERROR_INVALIDDATA;
@@ -134,72 +134,70 @@ static int vp9_handle_packet(AVFormatContext *ctx,
PayloadContext *rtp_vp9_ctx,
*
* 0 1 2 3 4 5 6 7
* +-+-+-+-+-+-+-+-+
- * L:| T | S | Q | R | (CONDITIONALLY RECOMMENDED)
+ * L:| TID |U| SID |D| (Conditionally RECOMMENDED)
+ * +-+-+-+-+-+-+-+-+
+ * | TL0PICIDX | (Conditionally REQUIRED)
* +-+-+-+-+-+-+-+-+
*
- * T, S and Q are 2-bit indices for temporal, spatial, and quality
layers.
- * If "F" is set in the initial octet, R is 2 bits representing the
number
- * of reference fields this frame refers to.
+ * TID: Temporal layer ID (3 bits)
+ * U: Switching up point (1 bit)
+ * SID: Spatial layer ID (3 bits)
+ * D: Inter-layer dependency used (1 bit)
+ * TL0PICIDX: Temporal Layer 0 Picture Index (8 bits, non-flexible mode
only)
*/
if (has_layer_idc) {
if (len < 1) {
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
return AVERROR_INVALIDDATA;
}
-layer_temporal = buf[0] & 0xC0;
-layer_spatial = buf[0] & 0x30;
-layer_quality = buf[0] & 0x0C;
-if (has_ref_idc) {
-ref_fields = buf[0] & 0x03;
-if (ref_fields)
-non_key_frame = 1;
-}
+layer_temporal = buf[0] >> 5;
+layer_spatial = (buf[0] >> 1) & 0x07;
buf++;
len--;
+
+if (!has_ref_idc) {
+if (len < 1) {
+av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
+return AVERROR_INVALIDDATA;
+}
+/* ignore TL0PICIDX */
+buf++;
+len--;
+}
}
/*
- * decode the reference fields
+ * decode reference indices
*
* 0 1 2 3 4 5 6 7
- * +-+-+-+-+-+-+-+-+ -\
- * F:| PID |X| RS| RQ| (OPTIONAL).
- * +-+-+-+-+-+-+-+-+ . - R times
- * X:| EXTENDED PID | (OPTIONAL).
- * +-+-+-+-+-+-+-+-+ -/
+ * +-+-+-+-+-+-+-+-+ -\
+ * P,F: | P_DIFF |N| (Conditionally REQUIRED)- up to 3 times
+ * +-+-+-+-+-+-+-+-+ -/
*
- * PID: The relative Picture ID referred to by t
