PR #21202 opened by xiatao URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21202 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21202.patch
Signed-off-by: Xia Tao <[email protected]> Rewrite branchless bit-twiddling into wasm-friendly logic: use comparisons to produce 0/1, then turn that into 0/-1 masks (or a tiny explicit branch), reducing tricky shifts and long dependency chains. Fewer memory accesses: cache c->range / c->low and CABAC table base pointers in locals to avoid repeated loads/stores. Shorter critical path: keep the decode step as a straight-line sequence (threshold compare → update low/range → update state) that JITs optimize well. Note: it is important to emphasize that this optimization relies purely on standard scalar instructions and makes no use of WebAssembly SIMD128 features. Benchmark: wasmtime tests/checkasm/checkasm --bench=cab* benchmarking with native FFmpeg timers nop: 43.9 checkasm: using random seed 786092358 checkasm: bench runs 1024 (1 << 10) SIMD128: - cabac.cabac [OK] - hevc_idct.idct [OK] - hevc_sao.sao_band [OK] - hevc_sao.sao_edge [OK] checkasm: all 21 tests passed cabac.bypass_c: 7.2 ( 1.00x) cabac.bypass_simd128: 3.0 ( 2.37x) cabac.bypass_sign_c: 3.6 ( 1.00x) cabac.bypass_sign_simd128: 2.8 ( 1.28x) cabac.get_c: 9.2 ( 1.00x) cabac.get_simd128: 7.8 ( 1.17x) From bf014423ed67f56827f82f100154d2b93421fbc0 Mon Sep 17 00:00:00 2001 From: Xia Tao <[email protected]> Date: Mon, 15 Dec 2025 16:25:47 +0800 Subject: [PATCH] avcodec/wasm: WebAssembly-friendly scalar CABAC fast path optimization Signed-off-by: Xia Tao <[email protected]> --- libavcodec/hevc/cabac.c | 7 ++ libavcodec/wasm/Makefile | 1 + libavcodec/wasm/cabac.c | 130 ++++++++++++++++++++++++ libavcodec/wasm/cabac.h | 32 ++++++ tests/checkasm/Makefile | 1 + tests/checkasm/cabac.c | 201 ++++++++++++++++++++++++++++++++++++++ tests/checkasm/checkasm.c | 3 + tests/checkasm/checkasm.h | 1 + 8 files changed, 376 insertions(+) create mode 100644 libavcodec/wasm/Makefile create mode 100644 libavcodec/wasm/cabac.c create mode 100644 libavcodec/wasm/cabac.h create mode 100644 tests/checkasm/cabac.c diff --git a/libavcodec/hevc/cabac.c b/libavcodec/hevc/cabac.c index 55d5741f87..c0fe23ec2e 100644 --- a/libavcodec/hevc/cabac.c +++ b/libavcodec/hevc/cabac.c @@ -29,6 +29,13 @@ #include "hevc.h" #include "hevcdec.h" +#if ARCH_WASM +#include "libavcodec/wasm/cabac.h" +#define get_cabac ff_get_cabac_wasm +#define get_cabac_bypass ff_get_cabac_bypass_wasm +#define get_cabac_bypass_sign ff_get_cabac_bypass_sign_wasm +#endif + #define CABAC_MAX_BIN 31 // ELEM(NAME, NUM_BINS) diff --git a/libavcodec/wasm/Makefile b/libavcodec/wasm/Makefile new file mode 100644 index 0000000000..e0a5066197 --- /dev/null +++ b/libavcodec/wasm/Makefile @@ -0,0 +1 @@ +OBJS-$(CONFIG_CABAC) += wasm/cabac.o diff --git a/libavcodec/wasm/cabac.c b/libavcodec/wasm/cabac.c new file mode 100644 index 0000000000..f5ffbb53ed --- /dev/null +++ b/libavcodec/wasm/cabac.c @@ -0,0 +1,130 @@ +/* + * CABAC helpers for WebAssembly + * + * Copyright (c) 2025 Xia Tao + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavcodec/wasm/cabac.h" +#include "libavcodec/cabac_functions.h" +#include <stdint.h> +#include "libavutil/attributes.h" +#include "libavutil/avassert.h" +#include "libavutil/intreadwrite.h" + +#ifndef UNCHECKED_BITSTREAM_READER +#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER +#endif + +static av_always_inline int get_cabac_core_wasm(CABACContext *c, uint8_t *state) +{ + int s = *state; + int range = c->range; + int low = c->low; + const uint8_t *lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET; + const uint8_t *mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET; + const uint8_t *norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET; + int RangeLPS = lps_range[2 * (range & 0xC0) + s]; + int lps_mask; + int bit; + + { + int range_mps = range - RangeLPS; + int thresh = range_mps << (CABAC_BITS + 1); + int is_lps = low > thresh; + + lps_mask = -is_lps; + + low = low - (thresh & lps_mask); + range = range_mps + ((RangeLPS - range_mps) & lps_mask); + } + + s ^= lps_mask; + *state = (mlps_state + 128)[s]; + bit = s & 1; + + lps_mask = norm_shift[range]; + range <<= lps_mask; + low <<= lps_mask; + + c->range = range; + c->low = low; + + if (!(low & CABAC_MASK)) + refill2(c); + + return bit; +} + +int ff_get_cabac_wasm(CABACContext *c, uint8_t *state) +{ + return get_cabac_core_wasm(c, state); +} + +static av_always_inline int get_cabac_bypass_core_wasm(CABACContext *c) +{ + c->low += c->low; + + if (!(c->low & CABAC_MASK)) + refill(c); + + { + int range_shifted = c->range << (CABAC_BITS + 1); + int low = c->low; + int bit = low >= range_shifted; + + if (bit) + low -= range_shifted; + + c->low = low; + return bit; + } +} + +int ff_get_cabac_bypass_wasm(CABACContext *c) +{ + return get_cabac_bypass_core_wasm(c); +} + +static av_always_inline int get_cabac_bypass_sign_core_wasm(CABACContext *c, int val) +{ + c->low += c->low; + + if (!(c->low & CABAC_MASK)) + refill(c); + + { + int range_shifted = c->range << (CABAC_BITS + 1); + int low = c->low; + int bit = low >= range_shifted; + + if (bit) + low -= range_shifted; + + c->low = low; + + return bit ? val : -val; + } +} + +int ff_get_cabac_bypass_sign_wasm(CABACContext *c, int val) +{ + return get_cabac_bypass_sign_core_wasm(c, val); +} diff --git a/libavcodec/wasm/cabac.h b/libavcodec/wasm/cabac.h new file mode 100644 index 0000000000..73935850f2 --- /dev/null +++ b/libavcodec/wasm/cabac.h @@ -0,0 +1,32 @@ +/* + * CABAC helpers for WebAssembly + * + * Copyright (c) 2025 Xia Tao + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_WASM_CABAC_H +#define AVCODEC_WASM_CABAC_H + +#include "libavcodec/cabac.h" + +int ff_get_cabac_wasm(CABACContext *c, uint8_t *state); +int ff_get_cabac_bypass_wasm(CABACContext *c); +int ff_get_cabac_bypass_sign_wasm(CABACContext *c, int val); + +#endif /* AVCODEC_WASM_CABAC_H */ diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 9e675ce189..2874657080 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -12,6 +12,7 @@ AVCODECOBJS-$(CONFIG_H264CHROMA) += h264chroma.o AVCODECOBJS-$(CONFIG_H264DSP) += h264dsp.o AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o +AVCODECOBJS-$(CONFIG_CABAC) += cabac.o AVCODECOBJS-$(CONFIG_HPELDSP) += hpeldsp.o AVCODECOBJS-$(CONFIG_IDCTDSP) += idctdsp.o AVCODECOBJS-$(CONFIG_LLAUDDSP) += llauddsp.o diff --git a/tests/checkasm/cabac.c b/tests/checkasm/cabac.c new file mode 100644 index 0000000000..1cd45061fb --- /dev/null +++ b/tests/checkasm/cabac.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2025 Xia Tao + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <string.h> + +#include "libavcodec/cabac.h" +#include "libavcodec/cabac_functions.h" + +#include "checkasm.h" + +#if ARCH_WASM + +#include "libavcodec/wasm/cabac.h" + +#define CABAC_BUF_SIZE 8192 +#define CABAC_STATE_SIZE 64 +#define CABAC_BIN_COUNT 48 + +static int get_cabac_c(CABACContext *c, uint8_t *state) +{ + return get_cabac_inline(c, state); +} + +static int get_cabac_bypass_c(CABACContext *c) +{ + return get_cabac_bypass(c); +} + +static int get_cabac_bypass_sign_c(CABACContext *c, int val) +{ + return get_cabac_bypass_sign(c, val); +} + +static void fill_random(uint8_t *buf, int size) +{ + for (int i = 0; i < size; i++) + buf[i] = rnd(); +} + +static void init_states(uint8_t *dst, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = rnd(); +} + +static void setup_contexts(CABACContext *c0, CABACContext *c1, uint8_t *buf, int buf_size) +{ + if (ff_init_cabac_decoder(c0, buf, buf_size) < 0) + fail(); + *c1 = *c0; +} + +static void check_get_cabac(int use_wasm) +{ + uint8_t buf[CABAC_BUF_SIZE]; + uint8_t state_ref[CABAC_STATE_SIZE]; + uint8_t state_new[CABAC_STATE_SIZE]; + CABACContext c_ref, c_new; + declare_func(int, CABACContext *, uint8_t *); + func_type *func = use_wasm ? +#if ARCH_WASM + ff_get_cabac_wasm +#else + get_cabac_c +#endif + : get_cabac_c; + + fill_random(buf, sizeof(buf)); + init_states(state_ref, CABAC_STATE_SIZE); + memcpy(state_new, state_ref, sizeof(state_ref)); + setup_contexts(&c_ref, &c_new, buf, sizeof(buf)); + + if (check_func(func, "cabac.get")) { + for (int i = 0; i < CABAC_BIN_COUNT; i++) { + int idx = i % CABAC_STATE_SIZE; + int ret_ref = call_ref(&c_ref, &state_ref[idx]); + int ret_new = call_new(&c_new, &state_new[idx]); + + if (ret_ref != ret_new || + state_ref[idx] != state_new[idx] || + c_ref.low != c_new.low || + c_ref.range != c_new.range || + c_ref.bytestream != c_new.bytestream) + fail(); + } + + if (checkasm_bench_func()) { + memcpy(state_new, state_ref, sizeof(state_ref)); + setup_contexts(&c_ref, &c_new, buf, sizeof(buf)); + bench_new(&c_new, &state_new[0]); + } + } +} + +static void check_get_cabac_bypass(int use_wasm) +{ + uint8_t buf[CABAC_BUF_SIZE]; + CABACContext c_ref, c_new; + declare_func(int, CABACContext *); + func_type *func = use_wasm ? +#if ARCH_WASM + ff_get_cabac_bypass_wasm +#else + get_cabac_bypass_c +#endif + : get_cabac_bypass_c; + + fill_random(buf, sizeof(buf)); + setup_contexts(&c_ref, &c_new, buf, sizeof(buf)); + + if (check_func(func, "cabac.bypass")) { + for (int i = 0; i < CABAC_BIN_COUNT; i++) { + int ret_ref = call_ref(&c_ref); + int ret_new = call_new(&c_new); + + if (ret_ref != ret_new || + c_ref.low != c_new.low || + c_ref.range != c_new.range || + c_ref.bytestream != c_new.bytestream) + fail(); + } + + if (checkasm_bench_func()) { + setup_contexts(&c_ref, &c_new, buf, sizeof(buf)); + bench_new(&c_new); + } + } +} + +static void check_get_cabac_bypass_sign(int use_wasm) +{ + uint8_t buf[CABAC_BUF_SIZE]; + CABACContext c_ref, c_new; + declare_func(int, CABACContext *, int); + func_type *func = use_wasm ? +#if ARCH_WASM + ff_get_cabac_bypass_sign_wasm +#else + get_cabac_bypass_sign_c +#endif + : get_cabac_bypass_sign_c; + + fill_random(buf, sizeof(buf)); + setup_contexts(&c_ref, &c_new, buf, sizeof(buf)); + + if (check_func(func, "cabac.bypass_sign")) { + for (int i = 0; i < CABAC_BIN_COUNT; i++) { + int val = (rnd() & 0x7FFF) + 1; + int ret_ref = call_ref(&c_ref, val); + int ret_new = call_new(&c_new, val); + + if (ret_ref != ret_new || + c_ref.low != c_new.low || + c_ref.range != c_new.range || + c_ref.bytestream != c_new.bytestream) + fail(); + } + + if (checkasm_bench_func()) { + int val = 1234; + setup_contexts(&c_ref, &c_new, buf, sizeof(buf)); + bench_new(&c_new, val); + } + } +} + +void checkasm_check_cabac(void) +{ + int use_wasm = !!(av_get_cpu_flags() & AV_CPU_FLAG_SIMD128); + + check_get_cabac(use_wasm); + check_get_cabac_bypass(use_wasm); + check_get_cabac_bypass_sign(use_wasm); + report("cabac"); +} + +#else /* !ARCH_WASM */ + +void checkasm_check_cabac(void) +{ + report("cabac"); +} + +#endif /* ARCH_WASM */ diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 268e600346..aab943ff63 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -181,6 +181,9 @@ static const struct { #if CONFIG_H264QPEL { "h264qpel", checkasm_check_h264qpel }, #endif + #if CONFIG_CABAC + { "cabac", checkasm_check_cabac }, + #endif #if CONFIG_HEVC_DECODER { "hevc_add_res", checkasm_check_hevc_add_res }, { "hevc_deblock", checkasm_check_hevc_deblock }, diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index a54231dd0d..b17967ac1b 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -112,6 +112,7 @@ void checkasm_check_hevc_deblock(void); void checkasm_check_hevc_idct(void); void checkasm_check_hevc_pel(void); void checkasm_check_hevc_sao(void); +void checkasm_check_cabac(void); void checkasm_check_hpeldsp(void); void checkasm_check_huffyuvdsp(void); void checkasm_check_idctdsp(void); -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
