PR #21709 opened by cenzhanquan1 URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21709 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21709.patch
1. add simple fade when volume. 2. do fade when adjust volume to maximal. when to adjust volume we hope that the audio sample is smooth we need to calculate the gradient step between each sample based on the total change of the gradient (i.e. dst_volume - src_volume) and the number of samples (nb_samples) and apply it to the target sample. Signed-off-by: cenzhanquan1 <[email protected]> >From f7545e0bf8979d3a91b6f9484b656abaafa50691 Mon Sep 17 00:00:00 2001 From: cenzhanquan1 <[email protected]> Date: Tue, 10 Feb 2026 11:29:12 +0800 Subject: [PATCH] libavfilter/volume: do fade when adjusting the volume. 1. add simple fade when volume. 2. do fade when adjust volume to maximal. when to adjust volume we hope that the audio sample is smooth we need to calculate the gradient step between each sample based on the total change of the gradient (i.e. dst_volume - src_volume) and the number of samples (nb_samples) and apply it to the target sample. Signed-off-by: cenzhanquan1 <[email protected]> --- libavfilter/af_volume.c | 211 ++++++++++++++++++++++++++++++++++++---- libavfilter/af_volume.h | 9 ++ 2 files changed, 201 insertions(+), 19 deletions(-) diff --git a/libavfilter/af_volume.c b/libavfilter/af_volume.c index cd40014345..7016c1950d 100644 --- a/libavfilter/af_volume.c +++ b/libavfilter/af_volume.c @@ -84,6 +84,8 @@ static const AVOption volume_options[] = { OFFSET(replaygain_preamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.0 }, -15.0, 15.0, A|F }, { "replaygain_noclip", "Apply replaygain clipping prevention", OFFSET(replaygain_noclip), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, A|F }, + { "transition", "transition time in seconds when volume changes via command", + OFFSET(transition), AV_OPT_TYPE_DOUBLE, { .dbl = 0.0 }, 0, 60.0, A|F|T }, { NULL } }; @@ -170,6 +172,22 @@ static inline void scale_samples_u8(uint8_t *dst, const uint8_t *src, dst[i] = av_clip_uint8(((((int64_t)src[i] - 128) * volume + 128) >> 8) + 128); } +static inline void fade_samples_u8(uint8_t *dst, const uint8_t *src, + int nb_samples, int chs, int dst_volume, int src_volume) +{ + int i, j, k = 0; + int64_t sample; + int64_t step; + + step = (((int64_t)dst_volume - src_volume) * 256) / nb_samples; + for (i = 0; i < nb_samples; i++) { + for (j = 0; j < chs; j++, k++) { + sample = (int64_t)(src[k] - 128) * (src_volume + (step * i) >> 8) + 128; + dst[k] = av_clip_uint8((sample >> 8) + 128); + } + } +} + static inline void scale_samples_u8_small(uint8_t *dst, const uint8_t *src, int nb_samples, int volume) { @@ -178,6 +196,22 @@ static inline void scale_samples_u8_small(uint8_t *dst, const uint8_t *src, dst[i] = av_clip_uint8((((src[i] - 128) * volume + 128) >> 8) + 128); } +static inline void fade_samples_u8_small(uint8_t *dst, const uint8_t *src, + int nb_samples, int chs, int dst_volume, int src_volume) +{ + int i, j, k = 0; + int sample; + int step; + + step = ((dst_volume - src_volume) * 256) / nb_samples; + for (i = 0; i < nb_samples; i++) { + for (j = 0; j < chs; j++, k++) { + sample = (src[k] - 128) * (src_volume + (step * i >> 8)) + 128; + dst[k] = av_clip_uint8((sample >> 8) + 128); + } + } +} + static inline void scale_samples_s16(uint8_t *dst, const uint8_t *src, int nb_samples, int volume) { @@ -188,6 +222,22 @@ static inline void scale_samples_s16(uint8_t *dst, const uint8_t *src, smp_dst[i] = av_clip_int16(((int64_t)smp_src[i] * volume + 128) >> 8); } +static inline void fade_samples_s16(uint8_t *dst, const uint8_t *src, + int nb_samples, int chs, int dst_volume, int src_volume) +{ + const int16_t *smp_src = (const int16_t *)src; + int16_t *smp_dst = (int16_t *)dst; + int i, j, k = 0; + int64_t step; + + step = (((int64_t)dst_volume - src_volume) * 256) / nb_samples; + for (i = 0; i < nb_samples; i++) { + for (j = 0; j < chs; j++, k++) { + smp_dst[k] = av_clip_int16((int64_t)(smp_src[k] * (src_volume + (step * i >> 8)) + 128) >> 8); + } + } +} + static inline void scale_samples_s16_small(uint8_t *dst, const uint8_t *src, int nb_samples, int volume) { @@ -198,6 +248,22 @@ static inline void scale_samples_s16_small(uint8_t *dst, const uint8_t *src, smp_dst[i] = av_clip_int16((smp_src[i] * volume + 128) >> 8); } +static inline void fade_samples_s16_small(uint8_t *dst, const uint8_t *src, + int nb_samples, int chs, int dst_volume, int src_volume) +{ + const int16_t *smp_src = (const int16_t *)src; + int16_t *smp_dst = (int16_t *)dst; + int i, j, k = 0; + int step; + + step = (((int64_t)dst_volume - src_volume) * 256) / nb_samples; + for (i = 0; i < nb_samples; i++) { + for (j = 0; j < chs; j++, k++) { + smp_dst[k] = av_clip_int16((int64_t)(smp_src[k] * (src_volume + (step * i >> 8)) + 128) >> 8); + } + } +} + static inline void scale_samples_s32(uint8_t *dst, const uint8_t *src, int nb_samples, int volume) { @@ -208,25 +274,48 @@ static inline void scale_samples_s32(uint8_t *dst, const uint8_t *src, smp_dst[i] = av_clipl_int32((((int64_t)smp_src[i] * volume + 128) >> 8)); } +static inline void fade_samples_s32(uint8_t *dst, const uint8_t *src, + int nb_samples, int chs, int dst_volume, int src_volume) +{ + const int32_t *smp_src = (const int32_t *)src; + int32_t *smp_dst = (int32_t *)dst; + int i, j, k = 0; + int64_t step; + + step = (((int64_t)dst_volume - src_volume) * 256) / nb_samples; + for (i = 0; i < nb_samples; i++) { + for (j = 0; j < chs; j++, k++) { + smp_dst[k] = av_clipl_int32((int64_t)(smp_src[k] * (src_volume + (step * i >> 8)) + 128) >> 8); + } + } +} + static av_cold void volume_init(VolumeContext *vol) { vol->samples_align = 1; switch (av_get_packed_sample_fmt(vol->sample_fmt)) { case AV_SAMPLE_FMT_U8: - if (vol->volume_i < 0x1000000) + if (vol->volume_i < 0x1000000) { vol->scale_samples = scale_samples_u8_small; - else + vol->fade_samples = fade_samples_u8_small; + } else { vol->scale_samples = scale_samples_u8; + vol->fade_samples = fade_samples_u8; + } break; case AV_SAMPLE_FMT_S16: - if (vol->volume_i < 0x10000) + if (vol->volume_i < 0x10000) { vol->scale_samples = scale_samples_s16_small; - else + vol->fade_samples = fade_samples_s16_small; + } else { vol->scale_samples = scale_samples_s16; + vol->fade_samples = fade_samples_s16; + } break; case AV_SAMPLE_FMT_S32: vol->scale_samples = scale_samples_s32; + vol->fade_samples = fade_samples_s32; break; case AV_SAMPLE_FMT_FLT: vol->samples_align = 4; @@ -313,8 +402,18 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar if (!strcmp(cmd, "volume")) { if ((ret = set_expr(&vol->volume_pexpr, args, ctx)) < 0) return ret; - if (vol->eval_mode == EVAL_MODE_ONCE) + if (vol->eval_mode == EVAL_MODE_ONCE) { + vol->volume_isrc = vol->volume_i; + vol->volume_src = vol->volume; set_volume(ctx); + if (vol->transition > 0 && vol->var_values[VAR_SAMPLE_RATE] > 0) { + vol->fade_total = (int)(vol->transition * vol->var_values[VAR_SAMPLE_RATE] + 0.5); + vol->fade_remaining = vol->fade_total; + } else { + vol->fade_total = 0; + vol->fade_remaining = 0; + } + } } return ret; @@ -380,7 +479,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf) if (vol->eval_mode == EVAL_MODE_FRAME) set_volume(ctx); - if (vol->volume == 1.0 || vol->volume_i == 256) { + if ((vol->volume == 1.0 || vol->volume_i == 256) && vol->fade_remaining <= 0) { out_buf = buf; goto end; } @@ -405,29 +504,103 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf) if (vol->precision != PRECISION_FIXED || vol->volume_i > 0) { int p, plane_samples; + int planar = av_sample_fmt_is_planar(buf->format); - if (av_sample_fmt_is_planar(buf->format)) + if (planar) plane_samples = FFALIGN(nb_samples, vol->samples_align); else plane_samples = FFALIGN(nb_samples * vol->channels, vol->samples_align); if (vol->precision == PRECISION_FIXED) { - for (p = 0; p < vol->planes; p++) { - vol->scale_samples(out_buf->extended_data[p], - buf->extended_data[p], plane_samples, - vol->volume_i); + if (vol->fade_remaining > 0 && vol->fade_samples && vol->fade_total > 0) { + int fade_done = vol->fade_total - vol->fade_remaining; + int fade_count = FFMIN(nb_samples, vol->fade_remaining); + int chs = planar ? 1 : vol->channels; + int vol_start = vol->volume_isrc + + (int64_t)(vol->volume_i - vol->volume_isrc) * fade_done / vol->fade_total; + int vol_end; + + if (fade_count >= nb_samples) { + /* Entire frame is within the fade region */ + vol_end = vol->volume_isrc + + (int64_t)(vol->volume_i - vol->volume_isrc) * (fade_done + fade_count) / vol->fade_total; + for (p = 0; p < vol->planes; p++) { + vol->fade_samples(out_buf->extended_data[p], buf->extended_data[p], + plane_samples, chs, vol_end, vol_start); + } + } else { + /* Fade ends mid-frame: fade the whole frame from vol_start to target volume. + * This slightly extends the fade to cover the full frame, which is acceptable + * since the difference is at most one frame duration. */ + vol_end = vol->volume_i; + for (p = 0; p < vol->planes; p++) { + vol->fade_samples(out_buf->extended_data[p], buf->extended_data[p], + plane_samples, chs, vol_end, vol_start); + } + } + vol->fade_remaining -= fade_count; + } else { + for (p = 0; p < vol->planes; p++) { + vol->scale_samples(out_buf->extended_data[p], + buf->extended_data[p], plane_samples, + vol->volume_i); + } } } else if (av_get_packed_sample_fmt(vol->sample_fmt) == AV_SAMPLE_FMT_FLT) { - for (p = 0; p < vol->planes; p++) { - vol->fdsp->vector_fmul_scalar((float *)out_buf->extended_data[p], - (const float *)buf->extended_data[p], - vol->volume, plane_samples); + if (vol->fade_remaining > 0 && vol->fade_total > 0) { + int fade_done = vol->fade_total - vol->fade_remaining; + int fade_count = FFMIN(nb_samples, vol->fade_remaining); + int chs = planar ? 1 : vol->channels; + for (p = 0; p < vol->planes; p++) { + float *dst = (float *)out_buf->extended_data[p]; + const float *src = (const float *)buf->extended_data[p]; + int i, j, k = 0; + for (i = 0; i < nb_samples; i++) { + float t; + if (i < fade_count) + t = (float)(fade_done + i) / vol->fade_total; + else + t = 1.0f; + float v = vol->volume_src + (vol->volume - vol->volume_src) * t; + for (j = 0; j < chs; j++, k++) + dst[k] = src[k] * v; + } + } + vol->fade_remaining -= fade_count; + } else { + for (p = 0; p < vol->planes; p++) { + vol->fdsp->vector_fmul_scalar((float *)out_buf->extended_data[p], + (const float *)buf->extended_data[p], + vol->volume, plane_samples); + } } } else { - for (p = 0; p < vol->planes; p++) { - vol->fdsp->vector_dmul_scalar((double *)out_buf->extended_data[p], - (const double *)buf->extended_data[p], - vol->volume, plane_samples); + if (vol->fade_remaining > 0 && vol->fade_total > 0) { + int fade_done = vol->fade_total - vol->fade_remaining; + int fade_count = FFMIN(nb_samples, vol->fade_remaining); + int chs = planar ? 1 : vol->channels; + for (p = 0; p < vol->planes; p++) { + double *dst = (double *)out_buf->extended_data[p]; + const double *src = (const double *)buf->extended_data[p]; + int i, j, k = 0; + for (i = 0; i < nb_samples; i++) { + double t; + if (i < fade_count) + t = (double)(fade_done + i) / vol->fade_total; + else + t = 1.0; + double v = vol->volume_src + (vol->volume - vol->volume_src) * t; + for (j = 0; j < chs; j++, k++) + dst[k] = src[k] * v; + } + } + vol->fade_remaining -= fade_count; + } else { + for (p = 0; p < vol->planes; p++) { + vol->fdsp->vector_dmul_scalar((double *)out_buf->extended_data[p], + (const double *)buf->extended_data[p], + vol->volume, plane_samples); + } } } } diff --git a/libavfilter/af_volume.h b/libavfilter/af_volume.h index e9527eea8a..b172bd3388 100644 --- a/libavfilter/af_volume.h +++ b/libavfilter/af_volume.h @@ -84,7 +84,16 @@ typedef struct VolumeContext { void (*scale_samples)(uint8_t *dst, const uint8_t *src, int nb_samples, int volume); + + void (*fade_samples)(uint8_t *dst, const uint8_t *src, int nb_samples, int chs, + int dst_volume, int src_volume); int samples_align; + + double transition; ///< transition time in seconds when volume changes (0 = instant) + int fade_total; ///< total number of samples for the fade transition + int fade_remaining; ///< remaining samples to fade + int volume_isrc; ///< source volume level at the start of fade + double volume_src; ///< source volume (float/double) at the start of fade } VolumeContext; void ff_volume_init_x86(VolumeContext *vol); -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
