PR #21662 opened by ngaullier
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21662
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21662.patch

Typical use case example: https://samples.ffmpeg.org/A-codecs/MP3/short.mups
This sample has 2M of ID3v2 data whereas the default probe size is 1M.
It is still supported as the decision is currently to simply default to mp3 in 
such a case.
So here is a proposal to make this cleaner and future proof (ex: aac support).

Tagged as WIP at the moment since the third commit should certainly include a 
minor version upgrade.


>From 5ccecd1716f4b5ef7b65f594d8d6815794df11ff Mon Sep 17 00:00:00 2001
From: Nicolas Gaullier <[email protected]>
Date: Thu, 5 Feb 2026 16:29:12 +0100
Subject: [PATCH 1/4] avformat: clarify formatprobesize AVOption

max_probe_size defaults to PROBE_BUF_MAX in av_probe_input_buffer2().
Current code also set the AVOption to PROBE_BUF_MAX by default, but
it is not documented, so better set it to 0 (auto): it is more flexible
and will be useful in following commits.

Fix missing documentation.

Signed-off-by: Nicolas Gaullier <[email protected]>
---
 doc/formats.texi            | 9 +++++++++
 libavformat/options_table.h | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/doc/formats.texi b/doc/formats.texi
index 876a9e92b3..d11b5ca9fe 100644
--- a/doc/formats.texi
+++ b/doc/formats.texi
@@ -21,6 +21,15 @@ Possible values:
 Reduce buffering.
 @end table
 
+@item formatprobesize @var{unsigned int} (@emph{input})
+Maximum number of bytes read from input in order to identify the
+input format. Only used when the format is not set
+explicitly by the caller.
+A limited value may result in a low probe score, missdetection, or
+no detection at all.
+Must be an integer not lesser than 2048, or 0 (auto).
+Default value is 0.
+
 @item probesize @var{integer} (@emph{input})
 Set probing size in bytes, i.e. the size of the data to analyze to get
 stream information. A higher value will enable detecting more
diff --git a/libavformat/options_table.h b/libavformat/options_table.h
index 5047b5ce50..7cef593508 100644
--- a/libavformat/options_table.h
+++ b/libavformat/options_table.h
@@ -37,7 +37,7 @@ static const AVOption avformat_options[] = {
 {"avioflags", NULL, OFFSET(avio_flags), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 
INT_MIN, INT_MAX, D|E, .unit = "avioflags"},
 {"direct", "reduce buffering", 0, AV_OPT_TYPE_CONST, {.i64 = AVIO_FLAG_DIRECT 
}, INT_MIN, INT_MAX, D|E, .unit = "avioflags"},
 {"probesize", "set probing size", OFFSET(probesize), AV_OPT_TYPE_INT64, {.i64 
= 5000000 }, 32, (double)INT64_MAX, D},
-{"formatprobesize", "number of bytes to probe file format", 
OFFSET(format_probesize), AV_OPT_TYPE_INT, {.i64 = PROBE_BUF_MAX}, 0, 
INT_MAX-1, D},
+{"formatprobesize", "number of bytes to probe file format", 
OFFSET(format_probesize), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX-1, D},
 {"packetsize", "set packet size", OFFSET(packet_size), AV_OPT_TYPE_INT, {.i64 
= DEFAULT }, 0, INT_MAX, E},
 {"fflags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = AVFMT_FLAG_AUTO_BSF 
}, INT_MIN, INT_MAX, D|E, .unit = "fflags"},
 {"flush_packets", "reduce the latency by flushing out packets immediately", 0, 
AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_FLUSH_PACKETS }, INT_MIN, INT_MAX, E, 
.unit = "fflags"},
-- 
2.52.0


>From 9daa7293114a7415bdc88e1905cfea3ada7642d1 Mon Sep 17 00:00:00 2001
From: Nicolas Gaullier <[email protected]>
Date: Thu, 5 Feb 2026 17:10:39 +0100
Subject: [PATCH 2/4] avformat: rename PROBE_BUF_MAX to PROBE_BUF_DEFAULT

Signed-off-by: Nicolas Gaullier <[email protected]>
---
 libavformat/format.c   | 4 ++--
 libavformat/internal.h | 2 +-
 libavformat/mp3dec.c   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavformat/format.c b/libavformat/format.c
index 83aa980a15..5c790b16f3 100644
--- a/libavformat/format.c
+++ b/libavformat/format.c
@@ -179,7 +179,7 @@ const AVInputFormat *av_probe_input_format3(const 
AVProbeData *pd,
                 nodat = ID3_ALMOST_GREATER_PROBE;
             lpd.buf      += id3len;
             lpd.buf_size -= id3len;
-        } else if (id3len >= PROBE_BUF_MAX) {
+        } else if (id3len >= PROBE_BUF_DEFAULT) {
             nodat = ID3_GREATER_MAX_PROBE;
             break;
         } else {
@@ -265,7 +265,7 @@ int av_probe_input_buffer2(AVIOContext *pb, const 
AVInputFormat **fmt,
     int eof = 0;
 
     if (!max_probe_size)
-        max_probe_size = PROBE_BUF_MAX;
+        max_probe_size = PROBE_BUF_DEFAULT;
     else if (max_probe_size < PROBE_BUF_MIN) {
         av_log(logctx, AV_LOG_ERROR,
                "Specified probe size value %u cannot be < %u\n", 
max_probe_size, PROBE_BUF_MIN);
diff --git a/libavformat/internal.h b/libavformat/internal.h
index 64452cce6e..ebb7f94927 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -31,7 +31,7 @@
 
 /** size of probe buffer, for guessing file type from file contents */
 #define PROBE_BUF_MIN 2048
-#define PROBE_BUF_MAX (1 << 20)
+#define PROBE_BUF_DEFAULT (1 << 20)
 
 #ifdef DEBUG
 #    define hex_dump_debug(class, buf, size) av_hex_dump_log(class, 
AV_LOG_DEBUG, buf, size)
diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index 5b153c7c9e..b458f2f6f6 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -123,7 +123,7 @@ static int mp3_read_probe(const AVProbeData *p)
     else if (max_frames>200 && p->buf_size < 2*max_framesizes)return 
AVPROBE_SCORE_EXTENSION;
     else if (max_frames>=4 && p->buf_size < 2*max_framesizes) return 
AVPROBE_SCORE_EXTENSION / 2;
     else if (ff_id3v2_match(buf0, ID3v2_DEFAULT_MAGIC) && 
2*ff_id3v2_tag_len(buf0) >= p->buf_size)
-                           return p->buf_size < PROBE_BUF_MAX ? 
AVPROBE_SCORE_EXTENSION / 4 : AVPROBE_SCORE_EXTENSION - 2;
+                           return p->buf_size < PROBE_BUF_DEFAULT ? 
AVPROBE_SCORE_EXTENSION / 4 : AVPROBE_SCORE_EXTENSION - 2;
     else if (first_frames > 1 && whole_used) return 5;
     else if (max_frames>=1 && p->buf_size < 10*max_framesizes) return 1;
     else                   return 0;
-- 
2.52.0


>From 9229e5359beffa334cd40ab88b5464c07cbc2c05 Mon Sep 17 00:00:00 2001
From: Nicolas Gaullier <[email protected]>
Date: Thu, 5 Feb 2026 17:15:11 +0100
Subject: [PATCH 3/4] avformat/av_probe_input_buffer2: early ID3v2 detection to
 extend max_probe_size

Currently, in case of huge ID3v2 tags, the format may be detected with
a low score or even missdetected.

Now, by default, skip the leading ID3v2 blocks and start counting probe
bytes from here. This way, the probe score may improve, and aac with
huge ID3v2 tags can now be properly detected.

It is assumed it is no more necessary to probe a buffer full of ID3v2
(without any media data) whereas previously the decision was to report
an mp3 format if the buffer was big enough. Now, 'invalid data' is
reported in such a case. See PR #21557 and 
8d679bc7e665d23ffabee93554ee90212b7a7c20.

Note that the lower level API av_probe_input_format3() is kept unchanged.
The behaviour of av_probe_input_buffer2() is also unchanged if
max_probe_size is manually set (non-zero).

Signed-off-by: Nicolas Gaullier <[email protected]>
---
 libavformat/format.c   | 12 +++++++++++-
 libavformat/internal.h |  2 ++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/libavformat/format.c b/libavformat/format.c
index 5c790b16f3..0a1f722469 100644
--- a/libavformat/format.c
+++ b/libavformat/format.c
@@ -260,12 +260,15 @@ int av_probe_input_buffer2(AVIOContext *pb, const 
AVInputFormat **fmt,
     AVProbeData pd = { filename ? filename : "" };
     uint8_t *buf = NULL;
     int ret = 0, probe_size, buf_offset = 0;
+    unsigned int max_probeext_size = 0, max_probe_size0;
     int score = 0;
     int ret2;
     int eof = 0;
 
-    if (!max_probe_size)
+    if (!max_probe_size) {
         max_probe_size = PROBE_BUF_DEFAULT;
+        max_probeext_size = PROBE_BUFEXT_DEFAULT;
+    }
     else if (max_probe_size < PROBE_BUF_MIN) {
         av_log(logctx, AV_LOG_ERROR,
                "Specified probe size value %u cannot be < %u\n", 
max_probe_size, PROBE_BUF_MIN);
@@ -286,6 +289,7 @@ int av_probe_input_buffer2(AVIOContext *pb, const 
AVInputFormat **fmt,
         }
     }
 
+    max_probe_size0 = max_probe_size;
     for (probe_size = PROBE_BUF_MIN; probe_size <= max_probe_size && !*fmt && 
!eof;
          probe_size = FFMIN(probe_size << 1,
                             FFMAX(max_probe_size, probe_size + 1))) {
@@ -305,6 +309,12 @@ int av_probe_input_buffer2(AVIOContext *pb, const 
AVInputFormat **fmt,
             eof   = 1;
         }
         buf_offset += ret;
+        while (offset + ID3v2_HEADER_SIZE < buf_offset && 
ff_id3v2_match(&buf[offset], ID3v2_DEFAULT_MAGIC)) {
+            int id3len = ff_id3v2_tag_len(&buf[offset]);
+            if (max_probe_size + id3len <= max_probe_size0 + max_probeext_size)
+                max_probe_size += id3len;
+            offset += id3len;
+        }
         if (buf_offset < offset)
             continue;
         pd.buf_size = buf_offset - offset;
diff --git a/libavformat/internal.h b/libavformat/internal.h
index ebb7f94927..3962c1d626 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -32,6 +32,8 @@
 /** size of probe buffer, for guessing file type from file contents */
 #define PROBE_BUF_MIN 2048
 #define PROBE_BUF_DEFAULT (1 << 20)
+/** additional size of probe buffer to skip ID3v2 data */
+#define PROBE_BUFEXT_DEFAULT (4 << 20)
 
 #ifdef DEBUG
 #    define hex_dump_debug(class, buf, size) av_hex_dump_log(class, 
AV_LOG_DEBUG, buf, size)
-- 
2.52.0


>From 138138ffe321ba6494dd99e7e208ee3458564dc5 Mon Sep 17 00:00:00 2001
From: Nicolas Gaullier <[email protected]>
Date: Thu, 5 Feb 2026 18:04:20 +0100
Subject: [PATCH 4/4] avformat/mp3dec: remove ID3v2 detection

The current detection has two output branches:
- the first one with a low score is dead code as it is provided
upstream by av_probe_input_format3() since 
33dd3485ba1970979698d1542aa2374ca35f7163.

- the second branch was introduced in 8d679bc7e665d23ffabee93554ee90212b7a7c20,
and, since the previous commit, it is unreachable when using ffprobe.
There still is a change when using the lower level API
av_probe_input_format3() directly: in case probe size is big enough
and contains only ID3v2 bytes, the API will no more report detection
of an mp3 format. It seems quite appropriate for a low level API
since the format could also be aac or anything else.

Signed-off-by: Nicolas Gaullier <[email protected]>
---
 libavformat/mp3dec.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index b458f2f6f6..912e5da502 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -122,8 +122,6 @@ static int mp3_read_probe(const AVProbeData *p)
     if   (first_frames>=7) return AVPROBE_SCORE_EXTENSION + 1;
     else if (max_frames>200 && p->buf_size < 2*max_framesizes)return 
AVPROBE_SCORE_EXTENSION;
     else if (max_frames>=4 && p->buf_size < 2*max_framesizes) return 
AVPROBE_SCORE_EXTENSION / 2;
-    else if (ff_id3v2_match(buf0, ID3v2_DEFAULT_MAGIC) && 
2*ff_id3v2_tag_len(buf0) >= p->buf_size)
-                           return p->buf_size < PROBE_BUF_DEFAULT ? 
AVPROBE_SCORE_EXTENSION / 4 : AVPROBE_SCORE_EXTENSION - 2;
     else if (first_frames > 1 && whole_used) return 5;
     else if (max_frames>=1 && p->buf_size < 10*max_framesizes) return 1;
     else                   return 0;
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to