Re: [PR] HADOOP-19724. [RISC-V] Add rv64 Zbc (CLMUL) bulk CRC32 (CRC32C not optimized) [hadoop]

via GitHub Mon, 10 Nov 2025 09:38:35 -0800


steveloughran commented on code in PR #8031:
URL: https://github.com/apache/hadoop/pull/8031#discussion_r2511392823



##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
  * limitations under the License.
  */
 
+#include <assert.h>
+#include <stddef.h>  // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+                                     const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {

Review Comment:
   explain what it does with a comment. same for the methods below



##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
  * limitations under the License.
  */
 
+#include <assert.h>
+#include <stddef.h>  // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+                                     const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL

Review Comment:
   explain what these are...assumign they're all defined in the crc spec, say so



##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
  * limitations under the License.
  */
 
+#include <assert.h>
+#include <stddef.h>  // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+                                     const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmul %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmulh %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint32_t rv_crc32_zlib_bitwise(uint32_t crc, const uint8_t *buf,
+                                             size_t len) {
+  uint32_t c = crc;
+  for (size_t i = 0; i < len; ++i) {
+    c ^= buf[i];
+    for (int k = 0; k < 8; ++k) {
+      uint32_t mask = -(int32_t)(c & 1);
+      c = (c >> 1) ^ (0xEDB88320U & mask);  // reflected polynomial
+    }
+  }
+  return c;
+}
+
+static uint32_t rv_crc32_zlib_clmul(uint32_t crc, const uint8_t *buf,
+                                    size_t len) {
+  const uint8_t *p = buf;
+  size_t n = len;
+
+  if (n < 32) {
+    return rv_crc32_zlib_bitwise(crc, p, n);
+  }
+
+  uintptr_t mis = (uintptr_t)p & 0xF;
+  if (unlikely(mis)) {
+    size_t pre = 16 - mis;
+    if (pre > n) pre = n;
+    crc = rv_crc32_zlib_bitwise(crc, p, pre);
+    p += pre;
+    n -= pre;
+  }
+
+  uint64_t x0 = *(const uint64_t *)(const void *)(p + 0);
+  uint64_t x1 = *(const uint64_t *)(const void *)(p + 8);
+  x0 ^= (uint64_t)crc;
+  p += 16;
+  n -= 16;
+
+  const uint64_t C1 = RV_CRC32_CONST_R3;
+  const uint64_t C2 = RV_CRC32_CONST_R4;
+
+  while (likely(n >= 16)) {
+    uint64_t tL = rv_clmul(C2, x1);
+    uint64_t tH = rv_clmulh(C2, x1);
+    uint64_t yL = rv_clmul(C1, x0);
+    uint64_t yH = rv_clmulh(C1, x0);
+    x0 = yL ^ tL;
+    x1 = yH ^ tH;
+
+    uint64_t d0 = *(const uint64_t *)(const void *)(p + 0);
+    uint64_t d1 = *(const uint64_t *)(const void *)(p + 8);
+    x0 ^= d0;
+    x1 ^= d1;
+    p += 16;
+    n -= 16;
+  }
+
+  {
+    uint64_t tH = rv_clmulh(x0, C2);
+    uint64_t tL = rv_clmul(x0, C2);
+    x0 = x1 ^ tL;
+    x1 = tH;
+  }
+
+  uint64_t hi = x1;
+  uint64_t lo = x0;
+  uint64_t t2 = (lo >> 32) | (hi << 32);
+  lo &= RV_CRC32_MASK32;
+
+  lo = rv_clmul(RV_CRC32_CONST_R5, lo) ^ t2;
+  uint64_t tmp = lo;
+  lo &= RV_CRC32_MASK32;
+  lo = rv_clmul(lo, RV_CRC32_CONST_RU);
+  lo &= RV_CRC32_MASK32;
+  lo = rv_clmul(lo, RV_CRC32_POLY_TRUE_LE_FULL) ^ tmp;
+
+  uint32_t c = (uint32_t)(lo >> 32);
+
+  if (n) {
+    c = rv_crc32_zlib_bitwise(c, p, n);
+  }
+  return c;
+}
+
 /**
- * RISC-V CRC32 hardware acceleration (placeholder)
+ * Pipelined version of hardware-accelerated CRC32 calculation using
+ * RISC-V Zbc carry-less multiply instructions.
  *
- * Phase 1: provide a RISC-V-specific compilation unit that currently makes
- * no runtime changes and falls back to the generic software path in
- * bulk_crc32.c. Future work will add Zbc-based acceleration and runtime
- * dispatch.
+ *   crc1, crc2, crc3 : Store initial checksum for each block before
+ *           calling. When it returns, updated checksums are stored.
+ *   p_buf : The base address of the data buffer. The buffer should be
+ *           at least as big as block_size * num_blocks.
+ *   block_size : The size of each block in bytes.
+ *   num_blocks : The number of blocks to work on. Min = 1, Max = 3

Review Comment:
   0 is taken too, just treated as a no-op. mention and that any other value 
raises an assertion. which isn't going to be picked up, is it?



##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
  * limitations under the License.
  */
 
+#include <assert.h>
+#include <stddef.h>  // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+                                     const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmul %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmulh %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint32_t rv_crc32_zlib_bitwise(uint32_t crc, const uint8_t *buf,
+                                             size_t len) {
+  uint32_t c = crc;
+  for (size_t i = 0; i < len; ++i) {
+    c ^= buf[i];
+    for (int k = 0; k < 8; ++k) {
+      uint32_t mask = -(int32_t)(c & 1);
+      c = (c >> 1) ^ (0xEDB88320U & mask);  // reflected polynomial
+    }
+  }
+  return c;
+}
+
+static uint32_t rv_crc32_zlib_clmul(uint32_t crc, const uint8_t *buf,
+                                    size_t len) {
+  const uint8_t *p = buf;
+  size_t n = len;
+
+  if (n < 32) {
+    return rv_crc32_zlib_bitwise(crc, p, n);
+  }
+
+  uintptr_t mis = (uintptr_t)p & 0xF;

Review Comment:
   comment that this is to handle misaligned data and that this is considered 
unlikely



##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
  * limitations under the License.
  */
 
+#include <assert.h>
+#include <stddef.h>  // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+                                     const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmul %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmulh %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint32_t rv_crc32_zlib_bitwise(uint32_t crc, const uint8_t *buf,
+                                             size_t len) {
+  uint32_t c = crc;
+  for (size_t i = 0; i < len; ++i) {
+    c ^= buf[i];
+    for (int k = 0; k < 8; ++k) {
+      uint32_t mask = -(int32_t)(c & 1);
+      c = (c >> 1) ^ (0xEDB88320U & mask);  // reflected polynomial
+    }
+  }
+  return c;
+}
+
+static uint32_t rv_crc32_zlib_clmul(uint32_t crc, const uint8_t *buf,
+                                    size_t len) {
+  const uint8_t *p = buf;
+  size_t n = len;
+
+  if (n < 32) {
+    return rv_crc32_zlib_bitwise(crc, p, n);
+  }
+
+  uintptr_t mis = (uintptr_t)p & 0xF;
+  if (unlikely(mis)) {
+    size_t pre = 16 - mis;
+    if (pre > n) pre = n;
+    crc = rv_crc32_zlib_bitwise(crc, p, pre);
+    p += pre;
+    n -= pre;
+  }
+
+  uint64_t x0 = *(const uint64_t *)(const void *)(p + 0);
+  uint64_t x1 = *(const uint64_t *)(const void *)(p + 8);
+  x0 ^= (uint64_t)crc;
+  p += 16;
+  n -= 16;
+
+  const uint64_t C1 = RV_CRC32_CONST_R3;
+  const uint64_t C2 = RV_CRC32_CONST_R4;
+
+  while (likely(n >= 16)) {

Review Comment:
   and explain this is going through 16 bytes of aligned data



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] HADOOP-19724. [RISC-V] Add rv64 Zbc (CLMUL) bulk CRC32 (CRC32C not optimized) [hadoop]

Reply via email to