[FFmpeg-devel] [PATCH] avfilter/af_whisper: fix srt index and int64 printf format (PR #20566)

Vittorio Palmisano via ffmpeg-devel Sun, 21 Sep 2025 06:29:20 -0700

PR #20566 opened by Vittorio Palmisano (vpalmisano)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20566
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20566.patch


avfilter/af_whisper: fix srt index and int64 printf format

- Use PRId64 for printing int64_t values in the SRT output.
- Increment the subtitle index correctly for each segment.


>From 8349df829eb64e972c0d58d622d4648a93cd27ef Mon Sep 17 00:00:00 2001
From: Vittorio Palmisano <[email protected]>
Date: Sun, 21 Sep 2025 15:19:51 +0200
Subject: [PATCH] avfilter/af_whisper: fix srt index and int64 printf format

- Use PRId64 for printing int64_t values in the SRT output.
- Increment the subtitle index correctly for each segment.
---
 libavfilter/af_whisper.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libavfilter/af_whisper.c b/libavfilter/af_whisper.c
index 663fe446bb..4d61cc0c9a 100644
--- a/libavfilter/af_whisper.c
+++ b/libavfilter/af_whisper.c
@@ -150,7 +150,7 @@ static int init(AVFilterContext *ctx)
     }
 
     av_log(ctx, AV_LOG_INFO,
-           "Whisper filter initialized: model: %s lang: %s queue: %ld ms\n",
+           "Whisper filter initialized: model: %s lang: %s queue: %" PRId64 " 
ms\n",
            wctx->model_path, wctx->language, wctx->queue / 1000);
 
     return 0;
@@ -194,7 +194,7 @@ static void run_transcription(AVFilterContext *ctx, AVFrame 
*frame, int samples)
     const float duration = (float) samples / WHISPER_SAMPLE_RATE;
 
     av_log(ctx, AV_LOG_INFO,
-           "run transcription at %ld ms, %d/%d samples (%.2f seconds)...\n",
+           "run transcription at %" PRId64 " ms, %d/%d samples (%.2f 
seconds)...\n",
            timestamp_ms, samples, wctx->audio_buffer_fill_size, duration);
 
     struct whisper_full_params params = 
whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
@@ -228,7 +228,7 @@ static void run_transcription(AVFilterContext *ctx, AVFrame 
*frame, int samples)
         const int64_t t0_ms = whisper_full_get_segment_t0(wctx->ctx_wsp, i) * 
10;
         const int64_t t1_ms = whisper_full_get_segment_t1(wctx->ctx_wsp, i) * 
10;
 
-        av_log(ctx, AV_LOG_DEBUG, "  [%ld-%ld%s]: \"%s\"\n",
+        av_log(ctx, AV_LOG_DEBUG, "  [%" PRId64 "-%" PRId64 "%s]: \"%s\"\n",
                timestamp_ms + t0_ms, timestamp_ms + t1_ms, turn ? " (turn)" : 
"", text_cleaned);
 
         if (segments_text) {
@@ -246,13 +246,13 @@ static void run_transcription(AVFilterContext *ctx, 
AVFrame *frame, int samples)
             if (!av_strcasecmp(wctx->format, "srt")) {
                 buf =
                     av_asprintf
-                    ("%d\n%02ld:%02ld:%02ld,%03ld --> 
%02ld:%02ld:%02ld,%03ld\n%s\n\n",
+                    ("%d\n%02" PRId64 ":%02" PRId64 ":%02" PRId64 ",%03" 
PRId64 " --> %02" PRId64 ":%02" PRId64 ":%02" PRId64 ",%03" PRId64 "\n%s\n\n",
                      wctx->index, start_t / 3600000,
                      (start_t / 60000) % 60, (start_t / 1000) % 60,
                      start_t % 1000, end_t / 3600000, (end_t / 60000) % 60,
                      (end_t / 1000) % 60, end_t % 1000, text_cleaned);
             } else if (!av_strcasecmp(wctx->format, "json")) {
-                buf = 
av_asprintf("{\"start\":%ld,\"end\":%ld,\"text\":\"%s\"}\n", start_t, end_t, 
text_cleaned);
+                buf = av_asprintf("{\"start\":%" PRId64 ",\"end\":%" PRId64 
",\"text\":\"%s\"}\n", start_t, end_t, text_cleaned);
             } else
                 buf = av_strdup(text_cleaned);
 
@@ -260,13 +260,13 @@ static void run_transcription(AVFilterContext *ctx, 
AVFrame *frame, int samples)
                 avio_write(wctx->avio_context, buf, strlen(buf));
                 av_freep(&buf);
             }
+
+            wctx->index++;
         }
 
         av_freep(&text_cleaned);
     }
 
-    wctx->index++;
-
     AVDictionary **metadata = &frame->metadata;
     if (metadata && segments_text) {
         av_dict_set(metadata, "lavfi.whisper.text", segments_text, 0);
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PATCH] avfilter/af_whisper: fix srt index and int64 printf format (PR #20566)

Reply via email to