[ 
https://issues.apache.org/jira/browse/HADOOP-19724?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18037215#comment-18037215
 ] 

ASF GitHub Bot commented on HADOOP-19724:
-----------------------------------------

steveloughran commented on code in PR #8031:
URL: https://github.com/apache/hadoop/pull/8031#discussion_r2511392823


##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
  * limitations under the License.
  */
 
+#include <assert.h>
+#include <stddef.h>  // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+                                     const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {

Review Comment:
   explain what it does with a comment. same for the methods below



##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
  * limitations under the License.
  */
 
+#include <assert.h>
+#include <stddef.h>  // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+                                     const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL

Review Comment:
   explain what these are...assumign they're all defined in the crc spec, say so



##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
  * limitations under the License.
  */
 
+#include <assert.h>
+#include <stddef.h>  // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+                                     const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmul %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmulh %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint32_t rv_crc32_zlib_bitwise(uint32_t crc, const uint8_t *buf,
+                                             size_t len) {
+  uint32_t c = crc;
+  for (size_t i = 0; i < len; ++i) {
+    c ^= buf[i];
+    for (int k = 0; k < 8; ++k) {
+      uint32_t mask = -(int32_t)(c & 1);
+      c = (c >> 1) ^ (0xEDB88320U & mask);  // reflected polynomial
+    }
+  }
+  return c;
+}
+
+static uint32_t rv_crc32_zlib_clmul(uint32_t crc, const uint8_t *buf,
+                                    size_t len) {
+  const uint8_t *p = buf;
+  size_t n = len;
+
+  if (n < 32) {
+    return rv_crc32_zlib_bitwise(crc, p, n);
+  }
+
+  uintptr_t mis = (uintptr_t)p & 0xF;
+  if (unlikely(mis)) {
+    size_t pre = 16 - mis;
+    if (pre > n) pre = n;
+    crc = rv_crc32_zlib_bitwise(crc, p, pre);
+    p += pre;
+    n -= pre;
+  }
+
+  uint64_t x0 = *(const uint64_t *)(const void *)(p + 0);
+  uint64_t x1 = *(const uint64_t *)(const void *)(p + 8);
+  x0 ^= (uint64_t)crc;
+  p += 16;
+  n -= 16;
+
+  const uint64_t C1 = RV_CRC32_CONST_R3;
+  const uint64_t C2 = RV_CRC32_CONST_R4;
+
+  while (likely(n >= 16)) {
+    uint64_t tL = rv_clmul(C2, x1);
+    uint64_t tH = rv_clmulh(C2, x1);
+    uint64_t yL = rv_clmul(C1, x0);
+    uint64_t yH = rv_clmulh(C1, x0);
+    x0 = yL ^ tL;
+    x1 = yH ^ tH;
+
+    uint64_t d0 = *(const uint64_t *)(const void *)(p + 0);
+    uint64_t d1 = *(const uint64_t *)(const void *)(p + 8);
+    x0 ^= d0;
+    x1 ^= d1;
+    p += 16;
+    n -= 16;
+  }
+
+  {
+    uint64_t tH = rv_clmulh(x0, C2);
+    uint64_t tL = rv_clmul(x0, C2);
+    x0 = x1 ^ tL;
+    x1 = tH;
+  }
+
+  uint64_t hi = x1;
+  uint64_t lo = x0;
+  uint64_t t2 = (lo >> 32) | (hi << 32);
+  lo &= RV_CRC32_MASK32;
+
+  lo = rv_clmul(RV_CRC32_CONST_R5, lo) ^ t2;
+  uint64_t tmp = lo;
+  lo &= RV_CRC32_MASK32;
+  lo = rv_clmul(lo, RV_CRC32_CONST_RU);
+  lo &= RV_CRC32_MASK32;
+  lo = rv_clmul(lo, RV_CRC32_POLY_TRUE_LE_FULL) ^ tmp;
+
+  uint32_t c = (uint32_t)(lo >> 32);
+
+  if (n) {
+    c = rv_crc32_zlib_bitwise(c, p, n);
+  }
+  return c;
+}
+
 /**
- * RISC-V CRC32 hardware acceleration (placeholder)
+ * Pipelined version of hardware-accelerated CRC32 calculation using
+ * RISC-V Zbc carry-less multiply instructions.
  *
- * Phase 1: provide a RISC-V-specific compilation unit that currently makes
- * no runtime changes and falls back to the generic software path in
- * bulk_crc32.c. Future work will add Zbc-based acceleration and runtime
- * dispatch.
+ *   crc1, crc2, crc3 : Store initial checksum for each block before
+ *           calling. When it returns, updated checksums are stored.
+ *   p_buf : The base address of the data buffer. The buffer should be
+ *           at least as big as block_size * num_blocks.
+ *   block_size : The size of each block in bytes.
+ *   num_blocks : The number of blocks to work on. Min = 1, Max = 3

Review Comment:
   0 is taken too, just treated as a no-op. mention and that any other value 
raises an assertion. which isn't going to be picked up, is it?



##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
  * limitations under the License.
  */
 
+#include <assert.h>
+#include <stddef.h>  // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+                                     const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmul %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmulh %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint32_t rv_crc32_zlib_bitwise(uint32_t crc, const uint8_t *buf,
+                                             size_t len) {
+  uint32_t c = crc;
+  for (size_t i = 0; i < len; ++i) {
+    c ^= buf[i];
+    for (int k = 0; k < 8; ++k) {
+      uint32_t mask = -(int32_t)(c & 1);
+      c = (c >> 1) ^ (0xEDB88320U & mask);  // reflected polynomial
+    }
+  }
+  return c;
+}
+
+static uint32_t rv_crc32_zlib_clmul(uint32_t crc, const uint8_t *buf,
+                                    size_t len) {
+  const uint8_t *p = buf;
+  size_t n = len;
+
+  if (n < 32) {
+    return rv_crc32_zlib_bitwise(crc, p, n);
+  }
+
+  uintptr_t mis = (uintptr_t)p & 0xF;

Review Comment:
   comment that this is to handle misaligned data and that this is considered 
unlikely



##########
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c:
##########
@@ -16,24 +16,200 @@
  * limitations under the License.
  */
 
+#include <assert.h>
+#include <stddef.h>  // for size_t
+#include <stdio.h>
+#include <string.h>
+
+#include "bulk_crc32.h"
+#include "gcc_optimizations.h"
+
+/**
+ * Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
+ * Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
+ * polynomial).
+ */
+
+typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
+                                     const uint8_t *, size_t, int);
+extern crc_pipelined_func_t pipelined_crc32_zlib_func;
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+
+#define RV_CRC32_CONST_R3 0x01751997d0ULL
+#define RV_CRC32_CONST_R4 0x00ccaa009eULL
+#define RV_CRC32_CONST_R5 0x0163cd6124ULL
+#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
+#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
+#define RV_CRC32_CONST_RU 0x01F7011641ULL
+
+static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmul %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
+  uint64_t r;
+  __asm__ volatile(
+      ".option push\n\t"
+      ".option arch, +zbc\n\t"
+      "clmulh %0, %1, %2\n\t"
+      ".option pop\n\t"
+      : "=r"(r)
+      : "r"(a), "r"(b));
+  return r;
+}
+
+static inline uint32_t rv_crc32_zlib_bitwise(uint32_t crc, const uint8_t *buf,
+                                             size_t len) {
+  uint32_t c = crc;
+  for (size_t i = 0; i < len; ++i) {
+    c ^= buf[i];
+    for (int k = 0; k < 8; ++k) {
+      uint32_t mask = -(int32_t)(c & 1);
+      c = (c >> 1) ^ (0xEDB88320U & mask);  // reflected polynomial
+    }
+  }
+  return c;
+}
+
+static uint32_t rv_crc32_zlib_clmul(uint32_t crc, const uint8_t *buf,
+                                    size_t len) {
+  const uint8_t *p = buf;
+  size_t n = len;
+
+  if (n < 32) {
+    return rv_crc32_zlib_bitwise(crc, p, n);
+  }
+
+  uintptr_t mis = (uintptr_t)p & 0xF;
+  if (unlikely(mis)) {
+    size_t pre = 16 - mis;
+    if (pre > n) pre = n;
+    crc = rv_crc32_zlib_bitwise(crc, p, pre);
+    p += pre;
+    n -= pre;
+  }
+
+  uint64_t x0 = *(const uint64_t *)(const void *)(p + 0);
+  uint64_t x1 = *(const uint64_t *)(const void *)(p + 8);
+  x0 ^= (uint64_t)crc;
+  p += 16;
+  n -= 16;
+
+  const uint64_t C1 = RV_CRC32_CONST_R3;
+  const uint64_t C2 = RV_CRC32_CONST_R4;
+
+  while (likely(n >= 16)) {

Review Comment:
   and explain this is going through 16 bytes of aligned data





> [RISC-V]  Add rv bulk CRC32 (non-CRC32C) optimized path
> -------------------------------------------------------
>
>                 Key: HADOOP-19724
>                 URL: https://issues.apache.org/jira/browse/HADOOP-19724
>             Project: Hadoop Common
>          Issue Type: Sub-task
>          Components: hadoop-common
>    Affects Versions: 3.5.0
>            Reporter: Ptroc
>            Priority: Major
>              Labels: native, pull-request-available, risc-v
>




--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to