[FFmpeg-cvslog] [ffmpeg] avfilter/af_whisper: Add translate parameter (branch master)

WyattBlue via ffmpeg-cvslog Sat, 14 Mar 2026 23:53:53 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


The following commit(s) were added to refs/heads/master by this push:
     new 482395f830 avfilter/af_whisper: Add translate parameter
482395f830 is described below

commit 482395f830a18686d23c12f783b7ea927c2f2bdb
Author:     WyattBlue <[email protected]>
AuthorDate: Mon Mar 9 00:23:36 2026 -0400
Commit:     Gyan Doshi <[email protected]>
CommitDate: Sun Mar 15 06:53:19 2026 +0000

    avfilter/af_whisper: Add translate parameter
---
 doc/filters.texi         |  5 +++++
 libavfilter/af_whisper.c | 15 +++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index 7f0c3cb99c..973a93345d 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -7743,6 +7743,11 @@ The file path of the downloaded whisper.cpp model 
(mandatory).
 The language to use for transcription ('auto' for auto-detect).
 Default value: @code{"auto"}
 
+@item translate
+If enabled, translate the transcription from the source language to English. A
+multilingual model is required to enable this option.
+Default value: @code{"false"}
+
 @item queue
 The maximum size that will be queued into the filter before processing the 
audio
 with whisper. Using a small value the audio stream will be processed more 
often,
diff --git a/libavfilter/af_whisper.c b/libavfilter/af_whisper.c
index 299a8bca7a..cb1c7b2ecf 100644
--- a/libavfilter/af_whisper.c
+++ b/libavfilter/af_whisper.c
@@ -42,6 +42,7 @@ typedef struct WhisperContext {
     const AVClass *class;
     char *model_path;
     char *language;
+    bool translate;
     bool use_gpu;
     int gpu_device;
     char *vad_model_path;
@@ -150,6 +151,18 @@ static int init(AVFilterContext *ctx)
         wctx->avio_context->direct = AVIO_FLAG_DIRECT;
     }
 
+    if (!whisper_is_multilingual(wctx->ctx_wsp)) {
+        if (!wctx->translate && strcmp(wctx->language, "auto") == 0) {
+            av_log(ctx, AV_LOG_WARNING,
+                   "Multilingual model not provided. Non-English audio may not 
be correctly transcribed.\n");
+        } else if (wctx->translate || (strcmp(wctx->language, "auto") != 0 && 
strcmp(wctx->language, "en") != 0)) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "%s requested but multilingual model not provided.\n", 
wctx->translate ? "Translation" : "Transcription");
+            return AVERROR(ENOSYS);
+        }
+        wctx->language = "en";
+    }
+
     av_log(ctx, AV_LOG_INFO,
            "Whisper filter initialized: model: %s lang: %s queue: %" PRId64 " 
ms\n",
            wctx->model_path, wctx->language, wctx->queue / 1000);
@@ -200,6 +213,7 @@ static void run_transcription(AVFilterContext *ctx, AVFrame 
*frame, int samples)
 
     struct whisper_full_params params = 
whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
     params.language = wctx->language;
+    params.translate = wctx->translate;
     params.n_threads = ff_filter_get_nb_threads(ctx);
     params.print_special = 0;
     params.print_progress = 0;
@@ -443,6 +457,7 @@ static int query_formats(const AVFilterContext *ctx,
 static const AVOption whisper_options[] = {
     { "model", "Path to the whisper.cpp model file", OFFSET(model_path), 
AV_OPT_TYPE_STRING,.flags = FLAGS },
     { "language", "Language for transcription ('auto' for auto-detect)", 
OFFSET(language), AV_OPT_TYPE_STRING, {.str = "auto"}, .flags = FLAGS },
+    { "translate", "Translate from source language to English", 
OFFSET(translate), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, .flags = FLAGS },
     { "queue", "Audio queue size", OFFSET(queue), AV_OPT_TYPE_DURATION, {.i64 
= 3000000}, 20000, HOURS, .flags = FLAGS },
     { "use_gpu", "Use GPU for processing", OFFSET(use_gpu), AV_OPT_TYPE_BOOL, 
{.i64 = 1}, 0, 1, .flags = FLAGS },
     { "gpu_device", "GPU device to use", OFFSET(gpu_device), AV_OPT_TYPE_INT, 
{.i64 = 0}, 0, INT_MAX, .flags = FLAGS },

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] avfilter/af_whisper: Add translate parameter (branch master)

Reply via email to