Add RISC-V Vector (RVV) optimized implementation for rte_hash_k16_cmp_eq() to accelerate 16-byte key comparison in hash lookup fast path.
The implementation uses RVV vector load and compare instructions to detect mismatched bytes and reduces comparison latency on RVV-capable systems. This patch is co-developed with gong-flying. Signed-off-by: gong-flying <[email protected]> Signed-off-by: P1erreCashon <[email protected]> --- config/riscv/meson.build | 18 +++++++- lib/hash/rte_cmp_riscv.h | 93 ++++++++++++++++++++++++++++++++++++++ lib/hash/rte_cuckoo_hash.c | 2 +- lib/hash/rte_cuckoo_hash.h | 6 ++- 4 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 lib/hash/rte_cmp_riscv.h diff --git a/config/riscv/meson.build b/config/riscv/meson.build index 07d7d9da23..a844faaa7b 100644 --- a/config/riscv/meson.build +++ b/config/riscv/meson.build @@ -113,12 +113,28 @@ dpdk_flags = flags_common + vendor_config['flags'] + arch_config.get('flags', [] # apply supported machine args machine_args = [] # Clear previous machine args -foreach flag: arch_config['machine_args'] + +# detect best ISA +if cc.has_argument('-march=rv64gc_zve64x') + machine_args += ['-march=rv64gc_zve64x'] + dpdk_conf.set('RTE_ARCH_RISCV_VEC', 1) + message('Using rv64gc_zve64x') +else + machine_args += ['-march=rv64gc'] + message('Using rv64gc (fallback)') +endif + +# apply extra tuning flags (like -mtune) +foreach flag: arch_config.get('machine_args', []) + if flag.startswith('-march') + continue + endif if cc.has_argument(flag) machine_args += flag endif endforeach + # apply flags foreach flag: dpdk_flags if flag.length() > 0 diff --git a/lib/hash/rte_cmp_riscv.h b/lib/hash/rte_cmp_riscv.h new file mode 100644 index 0000000000..7881d17e05 --- /dev/null +++ b/lib/hash/rte_cmp_riscv.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 Intel Corporation + */ + +#include <riscv_vector.h> + +/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */ +static inline int +rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint8_t *p1 = (const uint8_t *)key1; + const uint8_t *p2 = (const uint8_t *)key2; + size_t offset = 0; + + while (offset < 16) { + size_t vl = __riscv_vsetvl_e8m1(16 - offset); + + vuint8m1_t v1 = __riscv_vle8_v_u8m1(p1 + offset, vl); + vuint8m1_t v2 = __riscv_vle8_v_u8m1(p2 + offset, vl); + + /* find != bytes */ + vbool8_t neq = __riscv_vmsne_vv_u8m1_b8(v1, v2, vl); + + /* if any byte mismatches, return not equal */ + if (__riscv_vfirst_m_b8(neq, vl) >= 0) + return 1; + + offset += vl; + } + + /* all bytes equal */ + return 0; +} + +static inline int +rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len); +} + +static inline int +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static inline int +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k32_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static inline int +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static inline int +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static inline int +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 96, + (const char *) key2 + 96, key_len); +} + +static inline int +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k64_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c index 9cf94645f6..159001f2fa 100644 --- a/lib/hash/rte_cuckoo_hash.c +++ b/lib/hash/rte_cuckoo_hash.c @@ -357,7 +357,7 @@ rte_hash_create(const struct rte_hash_parameters *params) * If x86 architecture is used, select appropriate compare function, * which may use x86 intrinsics, otherwise use memcmp */ -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) || defined(RTE_ARCH_RISCV_VEC) /* Select function to compare keys */ switch (params->key_len) { case 16: diff --git a/lib/hash/rte_cuckoo_hash.h b/lib/hash/rte_cuckoo_hash.h index a528f1d1a0..b693abcb89 100644 --- a/lib/hash/rte_cuckoo_hash.h +++ b/lib/hash/rte_cuckoo_hash.h @@ -21,6 +21,10 @@ #include "rte_cmp_arm64.h" #endif +#if defined(RTE_ARCH_RISCV_VEC) +#include "rte_cmp_riscv.h" +#endif + /* Macro to enable/disable run-time checking of function parameters */ #if defined(RTE_LIBRTE_HASH_DEBUG) #define RETURN_IF_TRUE(cond, retval) do { \ @@ -34,7 +38,7 @@ #include <rte_hash_crc.h> #include <rte_jhash.h> -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) || defined(RTE_ARCH_RISCV_VEC) /* * All different options to select a key compare function, * based on the key size and custom function. -- 2.43.0

