af_volumedetect.c: Add 32bit float audio support

Yigithan Yigit Tue, 18 Jun 2024 02:17:28 -0700


> On Jun 17, 2024, at 5:52 PM, Rémi Denis-Courmont <[email protected]> wrote:
> 
> 
> 
> Le 17 juin 2024 13:18:11 GMT+02:00, Yigithan Yigit 
> <[email protected] <mailto:[email protected]>> a écrit :
>> ---
>> libavfilter/af_volumedetect.c | 159 ++++++++++++++++++++++++++++------
>> 1 file changed, 133 insertions(+), 26 deletions(-)
>> 
>> diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c
>> index 327801a7f9..dbbcd037a5 100644
>> --- a/libavfilter/af_volumedetect.c
>> +++ b/libavfilter/af_volumedetect.c
>> @@ -20,27 +20,51 @@
>> 
>> #include "libavutil/channel_layout.h"
>> #include "libavutil/avassert.h"
>> +#include "libavutil/mem.h"
>> #include "audio.h"
>> #include "avfilter.h"
>> #include "internal.h"
>> 
>> +#define MAX_DB_FLT 1024
>> #define MAX_DB 91
>> +#define HISTOGRAM_SIZE 0x10000
>> +#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2)
>> 
>> typedef struct VolDetectContext {
>> -    /**
>> -     * Number of samples at each PCM value.
>> -     * histogram[0x8000 + i] is the number of samples at value i.
>> -     * The extra element is there for symmetry.
>> -     */
>> -    uint64_t histogram[0x10001];
>> +    uint64_t* histogram; ///< for integer number of samples at each PCM 
>> value, for float number of samples at each dB
>> +    uint64_t nb_samples; ///< number of samples
>> +    double sum2;         ///< sum of the squares of the samples
>> +    double max;          ///< maximum sample value
>> +    int is_float;        ///< true if the input is in floating point
>> } VolDetectContext;
>> 
>> -static inline double logdb(uint64_t v)
>> +static inline double logdb(double v, enum AVSampleFormat sample_fmt)
>> {
>> -    double d = v / (double)(0x8000 * 0x8000);
>> -    if (!v)
>> -        return MAX_DB;
>> -    return -log10(d) * 10;
>> +    if (sample_fmt == AV_SAMPLE_FMT_FLT) {
>> +        if (!v)
>> +            return MAX_DB_FLT;
>> +        return -log10(v) * 10;
>> +    } else {
>> +        double d = v / (double)(0x8000 * 0x8000);
>> +        if (!v)
>> +            return MAX_DB;
>> +        return -log10(d) * 10;
>> +    }
>> +}
>> +
>> +static void update_float_stats(VolDetectContext *vd, float *audio_data)
>> +{
>> +    double sample;
>> +    int idx;
>> +    if(!isnormal(*audio_data))
>> +        return;
> 
> Do we really need to classify floats here? That's probably going to hurt 
> perfs badly, and makes an otherwise very vectorisable function not so easily 
> vectored.


You could be correct, we might consider checking NaN, Inf+/- values. Otherwise 
there is a risk of a crash if someone uses something like this 
“aelvalsrc=3.4e39”.

> 
>> +    sample = fabsf(*audio_data);
>> +    if (sample > vd->max)
>> +        vd->max = sample;
>> +    vd->sum2 += sample * sample;
>> +    idx = lrintf(floorf(logdb(sample * sample, AV_SAMPLE_FMT_FLT))) + 
>> MAX_DB_FLT;
> 
> You're recomputing the same value again, and you seem to be rounding twice in 
> a row?

I missed, fixed.

> 
>> +    vd->histogram[idx]++;
>> +    vd->nb_samples++;
>> }
>> 
>> static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
>> @@ -51,18 +75,41 @@ static int filter_frame(AVFilterLink *inlink, AVFrame 
>> *samples)
>>    int nb_channels = samples->ch_layout.nb_channels;
>>    int nb_planes   = nb_channels;
>>    int plane, i;
>> -    int16_t *pcm;
>> +    int planar = 0;
>> 
>> -    if (!av_sample_fmt_is_planar(samples->format)) {
>> -        nb_samples *= nb_channels;
>> +    planar = av_sample_fmt_is_planar(samples->format);
>> +    if (!planar)
>>        nb_planes = 1;
>> +    if (vd->is_float) {
>> +        float *audio_data;
>> +        for (plane = 0; plane < nb_planes; plane++) {
>> +            audio_data = (float *)samples->extended_data[plane];
>> +            for (i = 0; i < nb_samples; i++) {
>> +                if (planar) {
>> +                    update_float_stats(vd, &audio_data[i]);
>> +                } else {
>> +                    for (int j = 0; j < nb_channels; j++)
>> +                        update_float_stats(vd, &audio_data[i * nb_channels 
>> + j]);
>> +                }
>> +            }
>> +        }
>> +    } else {
>> +        int16_t *pcm;
>> +        for (plane = 0; plane < nb_planes; plane++) {
>> +            pcm = (int16_t *)samples->extended_data[plane];
>> +            for (i = 0; i < nb_samples; i++) {
>> +                if (planar) {
>> +                    vd->histogram[pcm[i] + 0x8000]++;
>> +                    vd->nb_samples++;
>> +                } else {
>> +                    for (int j = 0; j < nb_channels; j++) {
>> +                        vd->histogram[pcm[i * nb_channels + j] + 0x8000]++;
>> +                        vd->nb_samples++;
>> +                    }
>> +                }
>> +            }
>> +        }
> 
> Can't we pick the correct implementation (planar/packed and float/int) once 
> and for all whilst configuring the filter?

Actually sounds good, I am going to try.
Thanks for the feedback!

> 
>>    }
>> -    for (plane = 0; plane < nb_planes; plane++) {
>> -        pcm = (int16_t *)samples->extended_data[plane];
>> -        for (i = 0; i < nb_samples; i++)
>> -            vd->histogram[pcm[i] + 0x8000]++;
>> -    }
>> -
>>    return ff_filter_frame(inlink->dst->outputs[0], samples);
>> }
>> 
>> @@ -73,6 +120,20 @@ static void print_stats(AVFilterContext *ctx)
>>    uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
>>    uint64_t histdb[MAX_DB + 1] = { 0 };
>> 
>> +    if (!vd->nb_samples)
>> +        return;
>> +    if (vd->is_float) {
>> +        av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", 
>> vd->nb_samples);
>> +        av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(vd->sum2 
>> / vd->nb_samples, AV_SAMPLE_FMT_FLT));
>> +        av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", 
>> -2.0*logdb(vd->max, AV_SAMPLE_FMT_FLT));
>> +        for (i = 0; i < HISTOGRAM_SIZE_FLT && !vd->histogram[i]; i++);
>> +        for (; i >= 0 && sum < vd->nb_samples / 1000; i++) {
>> +            if (!vd->histogram[i])
>> +                continue;
>> +            av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", 
>> MAX_DB_FLT - i, vd->histogram[i]);
>> +            sum += vd->histogram[i];
>> +        }
>> +    } else {
>>    for (i = 0; i < 0x10000; i++)
>>        nb_samples += vd->histogram[i];
>>    av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
>> @@ -92,26 +153,61 @@ static void print_stats(AVFilterContext *ctx)
>>        return;
>>    power = (power + nb_samples_shift / 2) / nb_samples_shift;
>>    av_assert0(power <= 0x8000 * 0x8000);
>> -    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
>> +    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", 
>> -logdb((double)power, AV_SAMPLE_FMT_S16));
>> 
>>    max_volume = 0x8000;
>>    while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
>>                             !vd->histogram[0x8000 - max_volume])
>>        max_volume--;
>> -    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * 
>> max_volume));
>> +    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", 
>> -logdb((double)(max_volume * max_volume), AV_SAMPLE_FMT_S16));
>> 
>>    for (i = 0; i < 0x10000; i++)
>> -        histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
>> +        histdb[(int)logdb((double)(i - 0x8000) * (i - 0x8000), 
>> AV_SAMPLE_FMT_S16)] += vd->histogram[i];
>>    for (i = 0; i <= MAX_DB && !histdb[i]; i++);
>>    for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
>> -        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, 
>> histdb[i]);
>> +        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", -i, 
>> histdb[i]);
>>        sum += histdb[i];
>>    }
>> +    }
>> +}
>> +
>> +static int config_output(AVFilterLink *outlink)
>> +{
>> +    AVFilterContext *ctx = outlink->src;
>> +    VolDetectContext *vd = ctx->priv;
>> +    size_t histogram_size;
>> +
>> +    vd->is_float = outlink->format == AV_SAMPLE_FMT_FLT ||
>> +                   outlink->format == AV_SAMPLE_FMT_FLTP;
>> +
>> +    if (!vd->is_float) {
>> +        /*
>> +        * Number of samples at each PCM value.
>> +        * Only used for integer formats.
>> +        * For 16 bit signed PCM there are 65536.
>> +        * histogram[0x8000 + i] is the number of samples at value i.
>> +        * The extra element is there for symmetry.
>> +        */
>> +        histogram_size = HISTOGRAM_SIZE + 1;
>> +    } else {
>> +        /*
>> +        * The histogram is used to store the number of samples at each dB
>> +        * instead of the number of samples at each PCM value.
>> +        */
>> +        histogram_size = HISTOGRAM_SIZE_FLT + 1;
>> +    }
>> +    vd->histogram = av_calloc(histogram_size, sizeof(uint64_t));
>> +    if (!vd->histogram)
>> +        return AVERROR(ENOMEM);
>> +    return 0;
>> }
>> 
>> static av_cold void uninit(AVFilterContext *ctx)
>> {
>> +    VolDetectContext *vd = ctx->priv;
>>    print_stats(ctx);
>> +    if (vd->histogram)
>> +        av_freep(&vd->histogram);
>> }
>> 
>> static const AVFilterPad volumedetect_inputs[] = {
>> @@ -122,6 +218,14 @@ static const AVFilterPad volumedetect_inputs[] = {
>>    },
>> };
>> 
>> +static const AVFilterPad volumedetect_outputs[] = {
>> +    {
>> +        .name         = "default",
>> +        .type         = AVMEDIA_TYPE_AUDIO,
>> +        .config_props = config_output,
>> +    },
>> +};
>> +
>> const AVFilter ff_af_volumedetect = {
>>    .name          = "volumedetect",
>>    .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
>> @@ -129,6 +233,9 @@ const AVFilter ff_af_volumedetect = {
>>    .uninit        = uninit,
>>    .flags         = AVFILTER_FLAG_METADATA_ONLY,
>>    FILTER_INPUTS(volumedetect_inputs),
>> -    FILTER_OUTPUTS(ff_audio_default_filterpad),
>> -    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P),
>> +    FILTER_OUTPUTS(volumedetect_outputs),
>> +    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16,
>> +                      AV_SAMPLE_FMT_S16P,
>> +                      AV_SAMPLE_FMT_FLT,
>> +                      AV_SAMPLE_FMT_FLTP),
>> };
> _______________________________________________
> ffmpeg-devel mailing list
> [email protected] <mailto:[email protected]>
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> [email protected] <mailto:[email protected]> with 
> subject "unsubscribe".

_______________________________________________
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 2/3] avfilter/af_volumedetect.c: Add 32bit float audio support

Reply via email to