Hello FFmpeg Team,
I am submitting the completed work from my Google Summer of Code project,
which focuses on updating the SiTi filter for ITU-T P.910 07/22 compliance
and adding HDR support.
Attached is the patch for your review. I appreciate your feedback and
suggestions.
From e33fa1928164d986b5b62c02f00fab788a1f2f3b Mon Sep 17 00:00:00 2001
From: PoorvaGaikar <2003gaikarpoorva.com>
Date: Wed, 17 Jul 2024 23:36:56 +0530
Subject: [PATCH] libavfilter/siti: Update SITI filter to align with ITU-T
P.910 07/22 and add HDR support
---
libavfilter/vf_siti.c | 217 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 208 insertions(+), 9 deletions(-)
diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
index 722e7cecc7..2bbcaa0afd 100644
--- a/libavfilter/vf_siti.c
+++ b/libavfilter/vf_siti.c
@@ -47,6 +47,19 @@ static const int Y_FILTER[9] = {
-1, -2, -1
};
+static const float PU21_MATRICES[4][7] = {
+ // Reference: "PU21: A novel perceptually uniform encoding for adapting existing quality metrics for HDR"
+ // Rafał K. Mantiuk and M. Azimi, Picture Coding Symposium 2021
+ // https://github.com/gfxdisp/pu21
+ {1.070275272f, 0.4088273932f, 0.153224308f, 0.2520326168f, 1.063512885f, 1.14115047f, 521.4527484f}, // BANDING
+ {0.353487901f, 0.3734658629f, 8.277049286e-05f, 0.9062562627f, 0.09150303166f, 0.9099517204f, 596.3148142f}, // BANDING_GLARE
+ {1.043882782f, 0.6459495343f, 0.3194584211f, 0.374025247f, 1.114783422f, 1.095360363f, 384.9217577f}, // PEAKS
+ {816.885024f, 1479.463946f, 0.001253215609f, 0.9329636822f, 0.06746643971f, 1.573435413f, 419.6006374f} // PEAKS_GLARE
+};
+
+static const float PU21_MIN_VALUES[4] = {-1.5580e-07f, 5.4705e-10f, 1.3674e-07f, -9.7360e-08f};
+static const float PU21_MAX_VALUES[4] = {520.4673f, 595.3939f, 380.9853f, 407.5066f};
+
typedef struct SiTiContext {
const AVClass *class;
int pixel_depth;
@@ -63,8 +76,45 @@ typedef struct SiTiContext {
float *motion_matrix;
int full_range;
int print_summary;
+ int hdr_mode;
+ int bit_depth;
+ int color_range;
+ int eotf_function;
+ int calculation_domain;
+ float l_max;
+ float l_min;
+ float gamma;
+ int pu21_mode;
} SiTiContext;
+enum HdrMode {
+ SDR,
+ HDR10,
+ HLG
+};
+
+enum ColorRange {
+ LIMITED,
+ FULL
+};
+
+enum EotfFunction {
+ BT1886,
+ INV_SRGB
+};
+
+enum CalculationDomain {
+ PQ,
+ PU21
+};
+
+enum Pu21Mode {
+ BANDING,
+ BANDING_GLARE,
+ PEAKS,
+ PEAKS_GLARE
+};
+
static const enum AVPixelFormat pix_fmts[] = {
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
@@ -78,6 +128,15 @@ static av_cold int init(AVFilterContext *ctx)
SiTiContext *s = ctx->priv;
s->max_si = 0;
s->max_ti = 0;
+ s->hdr_mode = SDR;
+ s->bit_depth = 8;
+ s->color_range = LIMITED;
+ s->eotf_function = BT1886;
+ s->calculation_domain = PQ;
+ s->l_max = 300.0;
+ s->l_min = 0.1;
+ s->gamma = 2.4;
+ s->pu21_mode = BANDING;
return 0;
}
@@ -166,6 +225,105 @@ static uint16_t convert_full_range(int factor, uint16_t y)
return (full_upper * limit_y / limit_upper);
}
+// EOTF for BT.1886
+static float eotf_1886(float x, float gamma, float l_min, float l_max)
+{
+ // Reference: ITU-R BT.1886, Annex 1
+ // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1886-0-201103-I!!PDF-E.pdf
+ const float l_max_gamma = powf(l_max, 1.0f / gamma);
+ const float l_min_gamma = powf(l_min, 1.0f / gamma);
+ const float l_diff_gamma = l_max_gamma - l_min_gamma;
+ const float a = powf(l_diff_gamma, gamma);
+ const float b = l_min_gamma / l_diff_gamma;
+ const float adjusted_x = fmaxf(x + b, 0.0f);
+
+ return a * powf(adjusted_x, gamma);
+}
+
+static float eotf_inv_srgb(float x)
+{
+ // Inverse sRGB EOTF (Electro-Optical Transfer Function) according to IEC 61966-2-1:1999
+ // Section G.2 (Encoding transformation)
+ // Reference: https://cdn.standards.iteh.ai/samples/10795/ae461684569b40bbbb2d9a22b1047f05/IEC-61966-2-1-1999-AMD1-2003.pdf
+ return (x <= 0.04045f) ? x / 12.92f : powf((x + 0.055f) / 1.055f, 2.4f);
+}
+
+static float apply_display_model(SiTiContext *s, float x)
+{
+ // Apply either BT.1886 or inverse sRGB EOTF based on the context
+ if (s->eotf_function == BT1886) {
+ // BT.1886 EOTF
+ // Reference: ITU-R BT.1886-0, Annex 1
+ // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1886-0-201103-I!!PDF-E.pdf
+ return eotf_1886(x, s->gamma, 0.0f, 1.0f);
+ } else {
+ // Inverse sRGB EOTF
+ return eotf_inv_srgb(x);
+ }
+}
+
+static float oetf_pq(float x)
+{
+ // PQ (Perceptual Quantizer) OETF according to ITU-R BT.2100-2
+ // Reference: https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-E.pdf
+ // See page 5, Table 4
+
+ const float m1 = 0.1593017578125f;
+ const float m2 = 78.84375f;
+ const float c1 = 0.8359375f;
+ const float c2 = 18.8515625f;
+ const float c3 = 18.6875f;
+
+ float y = powf(x / 10000.0f, m1);
+ return powf((c1 + c2 * y) / (1.0f + c3 * y), m2);
+}
+
+static float oetf_pu21(float x, int mode);
+
+static float oetf_pu21_wrapper(float x) {
+ int mode = 0; // Set the appropriate mode or pass it as a parameter
+ return oetf_pu21(x, mode);
+}
+
+static float oetf_pu21(float x, int mode)
+{
+ // Reference: "PU21: A novel perceptually uniform encoding for adapting existing quality metrics for HDR"
+ // Rafał K. Mantiuk and M. Azimi, Picture Coding Symposium 2021
+ // https://github.com/gfxdisp/pu21
+ // Validate the mode
+ const float *p;
+ float p_min;
+ float p_max;
+ float numerator;
+ float denominator;
+ float base;
+ float exponentiated;
+ float scaled;
+
+ if (mode < 0 || mode > 3) {
+ return x; // Invalid mode, return input unchanged
+ }
+
+ // Get the parameters and min/max values for the given mode
+
+
+ p = PU21_MATRICES[mode];
+ p_min = PU21_MIN_VALUES[mode];
+ p_max = PU21_MAX_VALUES[mode];
+
+ // Declare all other variables at the beginning of the block
+ numerator = p[0] + p[1] * powf(x, p[3]);
+ denominator = 1.0f + p[2] * powf(x, p[3]);
+ base = numerator / denominator;
+
+ // Calculate the exponentiated and scaled value
+ exponentiated = powf(base, p[4]);
+ scaled = p[6] * (exponentiated - p[5]);
+
+ // Normalize the result to the range [0, 1]
+ return (scaled - p_min) / (p_max - p_min);
+}
+
// Applies sobel convolution
static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
{
@@ -186,20 +344,20 @@ static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int l
#define CONVOLVE(bps) \
{ \
uint##bps##_t *vsrc = (uint##bps##_t*)src; \
- for (int j = 1; j < s->height - 1; j++) { \
- for (int i = 1; i < s->width - 1; i++) { \
+ for (int img_j = 1; img_j < s->height - 1; img_j++) { \
+ for (int img_i = 1; img_i < s->width - 1; img_i++) { \
x_conv_sum = 0.0; \
y_conv_sum = 0.0; \
for (int k = 0; k < filter_size; k++) { \
ki = k % filter_width - 1; \
kj = floor(k / filter_width) - 1; \
- index = (j + kj) * stride + (i + ki); \
+ index = (img_j + kj) * stride + (img_i + ki); \
data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
x_conv_sum += data * X_FILTER[k]; \
y_conv_sum += data * Y_FILTER[k]; \
} \
gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
- dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
+ dst[(img_j - 1) * (s->width - 2) + (img_i - 1)] = gradient; \
} \
} \
}
@@ -254,15 +412,16 @@ static float std_deviation(float *img_metrics, int width, int height)
int size = height * width;
double mean = 0.0;
double sqr_diff = 0;
+ int j, i;
- for (int j = 0; j < height; j++)
- for (int i = 0; i < width; i++)
+ for (j = 0; j < height; j++)
+ for (i = 0; i < width; i++)
mean += img_metrics[j * width + i];
mean /= size;
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
+ for (j = 0; j < height; j++) {
+ for (i = 0; i < width; i++) {
float mean_diff = img_metrics[j * width + i] - mean;
sqr_diff += (mean_diff * mean_diff);
}
@@ -285,15 +444,46 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
float si;
float ti;
- s->full_range = is_full_range(frame);
+ s->full_range = is_full_range(frame); // Determine if full range is needed
s->nb_frames++;
+ // Apply EOTF and OETF transformations
+ for (int idx = 0; idx < s->width * s->height; idx++) {
+ float pixel = ((uint8_t*)frame->data[0])[idx] / 255.0f;
+ float eotf = apply_display_model(s, pixel);
+ float oetf = oetf_pq(eotf * s->l_max);
+ ((uint8_t*)frame->data[0])[idx] = (uint8_t)(oetf * 255.0f);
+ }
+
// Calculate si and ti
convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
ti = std_deviation(s->motion_matrix, s->width, s->height);
+ // Apply HDR transformations if necessary
+ if (s->hdr_mode != SDR) {
+ float (*oetf_func)(float);
+ if (s->calculation_domain == PQ) {
+ oetf_func = oetf_pq;
+ } else {
+ oetf_func = oetf_pu21_wrapper;
+ }
+
+ for (int i = 0; i < (s->width - 2) * (s->height - 2); i++) {
+ s->gradient_matrix[i] = oetf_func(apply_display_model(s, s->gradient_matrix[i] / 255.0f)) * 255.0f;
+ }
+ for (int i = 0; i < s->width * s->height; i++) {
+ s->motion_matrix[i] = oetf_func(apply_display_model(s, s->motion_matrix[i] / 255.0f)) * 255.0f;
+ }
+
+ si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
+ ti = std_deviation(s->motion_matrix, s->width, s->height);
+ }
+
+ // Print SI and TI values for each frame
+ av_log(ctx, AV_LOG_INFO, "Frame %"PRId64" - SI: %.6f, TI: %.6f\n", s->nb_frames, si, ti);
+
// Calculate statistics
s->max_si = fmaxf(si, s->max_si);
s->max_ti = fmaxf(ti, s->max_ti);
@@ -314,6 +504,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
static const AVOption siti_options[] = {
{ "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+ { "hdr_mode", "HDR mode (0: SDR, 1: HDR10, 2: HLG)", OFFSET(hdr_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, FLAGS },
+ { "bit_depth", "Bit depth (8, 10, or 12)", OFFSET(bit_depth), AV_OPT_TYPE_INT, {.i64=8}, 8, 12, FLAGS },
+ { "color_range", "Color range (0: limited, 1: full)", OFFSET(color_range), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+ { "eotf_function", "EOTF function (0: BT.1886, 1: Inverse sRGB)", OFFSET(eotf_function), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+ { "calculation_domain", "Calculation domain (0: PQ, 1: PU21)", OFFSET(calculation_domain), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+ { "l_max", "Maximum luminance", OFFSET(l_max), AV_OPT_TYPE_FLOAT, {.dbl=300.0}, 0, 10000, FLAGS },
+ { "l_min", "Minimum luminance", OFFSET(l_min), AV_OPT_TYPE_FLOAT, {.dbl=0.1}, 0, 1, FLAGS },
+ { "gamma", "Gamma value for BT.1886", OFFSET(gamma), AV_OPT_TYPE_FLOAT, {.dbl=2.4}, 1, 3, FLAGS },
+ { "pu21_mode", "PU21 mode (0: BANDING, 1: BANDING_GLARE, 2: PEAKS, 3: PEAKS_GLARE)", OFFSET(pu21_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS },
{ NULL }
};
--
2.43.0
_______________________________________________
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".