PR #21662 opened by ngaullier URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21662 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21662.patch
Typical use case example: https://samples.ffmpeg.org/A-codecs/MP3/short.mups This sample has 2M of ID3v2 data whereas the default probe size is 1M. It is still supported as the decision is currently to simply default to mp3 in such a case. So here is a proposal to make this cleaner and future proof (ex: aac support). Tagged as WIP at the moment since the third commit should certainly include a minor version upgrade. >From 5ccecd1716f4b5ef7b65f594d8d6815794df11ff Mon Sep 17 00:00:00 2001 From: Nicolas Gaullier <[email protected]> Date: Thu, 5 Feb 2026 16:29:12 +0100 Subject: [PATCH 1/4] avformat: clarify formatprobesize AVOption max_probe_size defaults to PROBE_BUF_MAX in av_probe_input_buffer2(). Current code also set the AVOption to PROBE_BUF_MAX by default, but it is not documented, so better set it to 0 (auto): it is more flexible and will be useful in following commits. Fix missing documentation. Signed-off-by: Nicolas Gaullier <[email protected]> --- doc/formats.texi | 9 +++++++++ libavformat/options_table.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/formats.texi b/doc/formats.texi index 876a9e92b3..d11b5ca9fe 100644 --- a/doc/formats.texi +++ b/doc/formats.texi @@ -21,6 +21,15 @@ Possible values: Reduce buffering. @end table +@item formatprobesize @var{unsigned int} (@emph{input}) +Maximum number of bytes read from input in order to identify the +input format. Only used when the format is not set +explicitly by the caller. +A limited value may result in a low probe score, missdetection, or +no detection at all. +Must be an integer not lesser than 2048, or 0 (auto). +Default value is 0. + @item probesize @var{integer} (@emph{input}) Set probing size in bytes, i.e. the size of the data to analyze to get stream information. A higher value will enable detecting more diff --git a/libavformat/options_table.h b/libavformat/options_table.h index 5047b5ce50..7cef593508 100644 --- a/libavformat/options_table.h +++ b/libavformat/options_table.h @@ -37,7 +37,7 @@ static const AVOption avformat_options[] = { {"avioflags", NULL, OFFSET(avio_flags), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, INT_MIN, INT_MAX, D|E, .unit = "avioflags"}, {"direct", "reduce buffering", 0, AV_OPT_TYPE_CONST, {.i64 = AVIO_FLAG_DIRECT }, INT_MIN, INT_MAX, D|E, .unit = "avioflags"}, {"probesize", "set probing size", OFFSET(probesize), AV_OPT_TYPE_INT64, {.i64 = 5000000 }, 32, (double)INT64_MAX, D}, -{"formatprobesize", "number of bytes to probe file format", OFFSET(format_probesize), AV_OPT_TYPE_INT, {.i64 = PROBE_BUF_MAX}, 0, INT_MAX-1, D}, +{"formatprobesize", "number of bytes to probe file format", OFFSET(format_probesize), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX-1, D}, {"packetsize", "set packet size", OFFSET(packet_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, E}, {"fflags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = AVFMT_FLAG_AUTO_BSF }, INT_MIN, INT_MAX, D|E, .unit = "fflags"}, {"flush_packets", "reduce the latency by flushing out packets immediately", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_FLUSH_PACKETS }, INT_MIN, INT_MAX, E, .unit = "fflags"}, -- 2.52.0 >From 9daa7293114a7415bdc88e1905cfea3ada7642d1 Mon Sep 17 00:00:00 2001 From: Nicolas Gaullier <[email protected]> Date: Thu, 5 Feb 2026 17:10:39 +0100 Subject: [PATCH 2/4] avformat: rename PROBE_BUF_MAX to PROBE_BUF_DEFAULT Signed-off-by: Nicolas Gaullier <[email protected]> --- libavformat/format.c | 4 ++-- libavformat/internal.h | 2 +- libavformat/mp3dec.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libavformat/format.c b/libavformat/format.c index 83aa980a15..5c790b16f3 100644 --- a/libavformat/format.c +++ b/libavformat/format.c @@ -179,7 +179,7 @@ const AVInputFormat *av_probe_input_format3(const AVProbeData *pd, nodat = ID3_ALMOST_GREATER_PROBE; lpd.buf += id3len; lpd.buf_size -= id3len; - } else if (id3len >= PROBE_BUF_MAX) { + } else if (id3len >= PROBE_BUF_DEFAULT) { nodat = ID3_GREATER_MAX_PROBE; break; } else { @@ -265,7 +265,7 @@ int av_probe_input_buffer2(AVIOContext *pb, const AVInputFormat **fmt, int eof = 0; if (!max_probe_size) - max_probe_size = PROBE_BUF_MAX; + max_probe_size = PROBE_BUF_DEFAULT; else if (max_probe_size < PROBE_BUF_MIN) { av_log(logctx, AV_LOG_ERROR, "Specified probe size value %u cannot be < %u\n", max_probe_size, PROBE_BUF_MIN); diff --git a/libavformat/internal.h b/libavformat/internal.h index 64452cce6e..ebb7f94927 100644 --- a/libavformat/internal.h +++ b/libavformat/internal.h @@ -31,7 +31,7 @@ /** size of probe buffer, for guessing file type from file contents */ #define PROBE_BUF_MIN 2048 -#define PROBE_BUF_MAX (1 << 20) +#define PROBE_BUF_DEFAULT (1 << 20) #ifdef DEBUG # define hex_dump_debug(class, buf, size) av_hex_dump_log(class, AV_LOG_DEBUG, buf, size) diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c index 5b153c7c9e..b458f2f6f6 100644 --- a/libavformat/mp3dec.c +++ b/libavformat/mp3dec.c @@ -123,7 +123,7 @@ static int mp3_read_probe(const AVProbeData *p) else if (max_frames>200 && p->buf_size < 2*max_framesizes)return AVPROBE_SCORE_EXTENSION; else if (max_frames>=4 && p->buf_size < 2*max_framesizes) return AVPROBE_SCORE_EXTENSION / 2; else if (ff_id3v2_match(buf0, ID3v2_DEFAULT_MAGIC) && 2*ff_id3v2_tag_len(buf0) >= p->buf_size) - return p->buf_size < PROBE_BUF_MAX ? AVPROBE_SCORE_EXTENSION / 4 : AVPROBE_SCORE_EXTENSION - 2; + return p->buf_size < PROBE_BUF_DEFAULT ? AVPROBE_SCORE_EXTENSION / 4 : AVPROBE_SCORE_EXTENSION - 2; else if (first_frames > 1 && whole_used) return 5; else if (max_frames>=1 && p->buf_size < 10*max_framesizes) return 1; else return 0; -- 2.52.0 >From 9229e5359beffa334cd40ab88b5464c07cbc2c05 Mon Sep 17 00:00:00 2001 From: Nicolas Gaullier <[email protected]> Date: Thu, 5 Feb 2026 17:15:11 +0100 Subject: [PATCH 3/4] avformat/av_probe_input_buffer2: early ID3v2 detection to extend max_probe_size Currently, in case of huge ID3v2 tags, the format may be detected with a low score or even missdetected. Now, by default, skip the leading ID3v2 blocks and start counting probe bytes from here. This way, the probe score may improve, and aac with huge ID3v2 tags can now be properly detected. It is assumed it is no more necessary to probe a buffer full of ID3v2 (without any media data) whereas previously the decision was to report an mp3 format if the buffer was big enough. Now, 'invalid data' is reported in such a case. See PR #21557 and 8d679bc7e665d23ffabee93554ee90212b7a7c20. Note that the lower level API av_probe_input_format3() is kept unchanged. The behaviour of av_probe_input_buffer2() is also unchanged if max_probe_size is manually set (non-zero). Signed-off-by: Nicolas Gaullier <[email protected]> --- libavformat/format.c | 12 +++++++++++- libavformat/internal.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/libavformat/format.c b/libavformat/format.c index 5c790b16f3..0a1f722469 100644 --- a/libavformat/format.c +++ b/libavformat/format.c @@ -260,12 +260,15 @@ int av_probe_input_buffer2(AVIOContext *pb, const AVInputFormat **fmt, AVProbeData pd = { filename ? filename : "" }; uint8_t *buf = NULL; int ret = 0, probe_size, buf_offset = 0; + unsigned int max_probeext_size = 0, max_probe_size0; int score = 0; int ret2; int eof = 0; - if (!max_probe_size) + if (!max_probe_size) { max_probe_size = PROBE_BUF_DEFAULT; + max_probeext_size = PROBE_BUFEXT_DEFAULT; + } else if (max_probe_size < PROBE_BUF_MIN) { av_log(logctx, AV_LOG_ERROR, "Specified probe size value %u cannot be < %u\n", max_probe_size, PROBE_BUF_MIN); @@ -286,6 +289,7 @@ int av_probe_input_buffer2(AVIOContext *pb, const AVInputFormat **fmt, } } + max_probe_size0 = max_probe_size; for (probe_size = PROBE_BUF_MIN; probe_size <= max_probe_size && !*fmt && !eof; probe_size = FFMIN(probe_size << 1, FFMAX(max_probe_size, probe_size + 1))) { @@ -305,6 +309,12 @@ int av_probe_input_buffer2(AVIOContext *pb, const AVInputFormat **fmt, eof = 1; } buf_offset += ret; + while (offset + ID3v2_HEADER_SIZE < buf_offset && ff_id3v2_match(&buf[offset], ID3v2_DEFAULT_MAGIC)) { + int id3len = ff_id3v2_tag_len(&buf[offset]); + if (max_probe_size + id3len <= max_probe_size0 + max_probeext_size) + max_probe_size += id3len; + offset += id3len; + } if (buf_offset < offset) continue; pd.buf_size = buf_offset - offset; diff --git a/libavformat/internal.h b/libavformat/internal.h index ebb7f94927..3962c1d626 100644 --- a/libavformat/internal.h +++ b/libavformat/internal.h @@ -32,6 +32,8 @@ /** size of probe buffer, for guessing file type from file contents */ #define PROBE_BUF_MIN 2048 #define PROBE_BUF_DEFAULT (1 << 20) +/** additional size of probe buffer to skip ID3v2 data */ +#define PROBE_BUFEXT_DEFAULT (4 << 20) #ifdef DEBUG # define hex_dump_debug(class, buf, size) av_hex_dump_log(class, AV_LOG_DEBUG, buf, size) -- 2.52.0 >From 138138ffe321ba6494dd99e7e208ee3458564dc5 Mon Sep 17 00:00:00 2001 From: Nicolas Gaullier <[email protected]> Date: Thu, 5 Feb 2026 18:04:20 +0100 Subject: [PATCH 4/4] avformat/mp3dec: remove ID3v2 detection The current detection has two output branches: - the first one with a low score is dead code as it is provided upstream by av_probe_input_format3() since 33dd3485ba1970979698d1542aa2374ca35f7163. - the second branch was introduced in 8d679bc7e665d23ffabee93554ee90212b7a7c20, and, since the previous commit, it is unreachable when using ffprobe. There still is a change when using the lower level API av_probe_input_format3() directly: in case probe size is big enough and contains only ID3v2 bytes, the API will no more report detection of an mp3 format. It seems quite appropriate for a low level API since the format could also be aac or anything else. Signed-off-by: Nicolas Gaullier <[email protected]> --- libavformat/mp3dec.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c index b458f2f6f6..912e5da502 100644 --- a/libavformat/mp3dec.c +++ b/libavformat/mp3dec.c @@ -122,8 +122,6 @@ static int mp3_read_probe(const AVProbeData *p) if (first_frames>=7) return AVPROBE_SCORE_EXTENSION + 1; else if (max_frames>200 && p->buf_size < 2*max_framesizes)return AVPROBE_SCORE_EXTENSION; else if (max_frames>=4 && p->buf_size < 2*max_framesizes) return AVPROBE_SCORE_EXTENSION / 2; - else if (ff_id3v2_match(buf0, ID3v2_DEFAULT_MAGIC) && 2*ff_id3v2_tag_len(buf0) >= p->buf_size) - return p->buf_size < PROBE_BUF_DEFAULT ? AVPROBE_SCORE_EXTENSION / 4 : AVPROBE_SCORE_EXTENSION - 2; else if (first_frames > 1 && whole_used) return 5; else if (max_frames>=1 && p->buf_size < 10*max_framesizes) return 1; else return 0; -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
