This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new 8ee29e1cb fix(c++): fix misaligned address access errors detected by 
UBSan in buffer.h (#3479)
8ee29e1cb is described below

commit 8ee29e1cbe556f7f5c1cb118d90175c740bb05dd
Author: Peiyang He <[email protected]>
AuthorDate: Fri Mar 20 05:40:07 2026 -0400

    fix(c++): fix misaligned address access errors detected by UBSan in 
buffer.h (#3479)
    
    ## Why?
    `buffer.h` reads and writes multi-byte integers (16/32/64-bit) directly
    into a raw `uint8_t*` buffer. The original code cast the byte pointer to
    a **typed** pointer and dereferenced it:
    
    - Read:
    `return reinterpret_cast<const T*>(data_ + offset)[0];`
    - Write:
    `*reinterpret_cast<T*>(data_ + offset) = value;`
    Dereferencing a pointer that is not aligned to `alignof(T)` is
    considered UB in C++. UBSan correctly flagged these as misaligned
    address runtime errors, because `data_ + offset` can be at any byte
    boundary.
    
    
    ## What does this PR do?
    
    
    Two helper templates were added to the `buffer.h`:
    
    ```c++
    template <typename T>
    FORY_ALWAYS_INLINE static T load_unaligned(const uint8_t *ptr) {
        T value;
        std::memcpy(&value, ptr, sizeof(T));
        return value;
    }
    
    template <typename T>
    FORY_ALWAYS_INLINE static void store_unaligned(uint8_t *ptr, T value) {
        std::memcpy(ptr, &value, sizeof(T));
    }
    ```
    
    All `reinterpret_cast` calls that may lead to UB in the file were
    replaced with calls to these helpers.
    
    No UB were detected when running `bazel test --cache_test_results=no
    --config=x86_64 --config=ubsan $(bazel query //...)` after applying this
    patch.
    Details can be found in
    
[ubsan_report.txt](https://github.com/user-attachments/files/25990662/ubsan_report.txt).
    Only a few `unused-but-set-parameter` warnings were detected by UBSan.
    
    ## Related issues
    
    
    
    Fix https://github.com/apache/fory/issues/3459
    
    ## AI Contribution Checklist
    
    
    
    - [ ] Substantial AI assistance was used in this PR: `yes` / `no`
    - [ ] If `yes`, I included a completed [AI Contribution
    
Checklist](https://github.com/apache/fory/blob/main/AI_POLICY.md#9-contributor-checklist-for-ai-assisted-prs)
    in this PR description and the required `AI Usage Disclosure`.
    
    
    
    ## Does this PR introduce any user-facing change?
    
    
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    
    I believe replacing `reinterpret_cast` with `memcpy` won't incur much
    runtime burden, since `memcpy` can be optimized by both GCC and Clang
    effectively.
    
    ---------
    
    Co-authored-by: Shawn Yang <[email protected]>
---
 .bazelrc                 |  17 ++++++-
 .github/workflows/ci.yml |  63 +++++++++++++++++++++++++
 AGENTS.md                |   1 +
 ci/run_ci.sh             |   9 +++-
 cpp/fory/util/buffer.h   | 116 +++++++++++++++++++++++------------------------
 5 files changed, 145 insertions(+), 61 deletions(-)

diff --git a/.bazelrc b/.bazelrc
index 89a0c3084..0e67e8f6b 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -61,4 +61,19 @@ build:x86_64 --copt=-mbmi
 build:x86_64 --copt=-mbmi2
 
 # ARM64-specific optimizations (if any needed in the future)
-build:arm64 --copt=-march=armv8-a
\ No newline at end of file
+build:arm64 --copt=-march=armv8-a
+
+# AddressSanitizer
+build:asan --strip=never
+build:asan --copt=-fsanitize=address
+build:asan --copt=-g
+build:asan --copt=-fno-omit-frame-pointer
+build:asan --linkopt=-fsanitize=address
+
+# UndefinedBehaviorSanitizer
+build:ubsan --strip=never
+build:ubsan 
--copt=-fsanitize=alignment,bool,bounds,bounds-strict,builtin,enum,integer-divide-by-zero,object-size,pointer-overflow,return,shift,signed-integer-overflow,unreachable,vla-bound,vptr
+build:ubsan --copt=-g
+build:ubsan --copt=-fno-omit-frame-pointer
+build:ubsan 
--linkopt=-fsanitize=alignment,bool,bounds,bounds-strict,builtin,enum,integer-divide-by-zero,object-size,pointer-overflow,return,shift,signed-integer-overflow,unreachable,vla-bound,vptr
+build:arm64 --copt=-march=armv8-a
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 00b8c3d04..2bce82463 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -604,6 +604,69 @@ jobs:
             bazel-out/*/testlogs/**/*.xml
           if-no-files-found: ignore
 
+  cpp_sanitizers:
+    name: C++ Sanitizer (${{ matrix.sanitizer }})
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        sanitizer: [asan, ubsan]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.11
+          cache: 'pip'
+      - name: Cache Bazel binary
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/bin/bazel
+            ~/.local/bin/bazel
+          key: bazel-binary-${{ runner.os }}-${{ runner.arch }}-${{ 
hashFiles('.bazelversion') }}
+          restore-keys: |
+            bazel-binary-${{ runner.os }}-${{ runner.arch }}-
+      - name: Cache Bazel repository cache
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/bazel/_bazel_*/*/external
+          key: bazel-repo-sanitizer-${{ runner.os }}-${{ runner.arch 
}}-py311-${{ hashFiles('WORKSPACE', '.bazelrc', 'bazel/**') }}
+          restore-keys: |
+            bazel-repo-sanitizer-${{ runner.os }}-${{ runner.arch }}-py311-
+      - name: Cache Bazel build outputs
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/bazel
+          key: bazel-build-cpp-sanitizer-${{ runner.os }}-${{ runner.arch 
}}-${{ matrix.sanitizer }}-${{ hashFiles('cpp/**', 'BUILD', 'WORKSPACE', 
'.bazelrc') }}
+      - name: Install Bazel and C++ deps
+        run: python ./ci/run_ci.py cpp --install-deps-only
+      - name: Run C++ ${{ matrix.sanitizer }} tests
+        run: |
+          ARCH="$(uname -m)"
+          BAZEL_CONFIGS="--config=${{ matrix.sanitizer }}"
+          if [[ -x ~/bin/bazel ]]; then
+            BAZEL_BIN=~/bin/bazel
+          elif [[ -x ~/.local/bin/bazel ]]; then
+            BAZEL_BIN=~/.local/bin/bazel
+          else
+            echo "bazel not found in ~/bin or ~/.local/bin"
+            exit 1
+          fi
+          if [[ "${ARCH}" == "x86_64" || "${ARCH}" == "amd64" ]]; then
+            BAZEL_CONFIGS="--config=x86_64 ${BAZEL_CONFIGS}"
+          fi
+          ${BAZEL_BIN} test --cache_test_results=no ${BAZEL_CONFIGS} 
$(${BAZEL_BIN} query //cpp/...)
+      - name: Upload Bazel Test Logs (${{ matrix.sanitizer }})
+        uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: bazel-test-logs-${{ matrix.sanitizer }}
+          path: |
+            bazel-out/*/testlogs/**/*.log
+            bazel-out/*/testlogs/**/*.xml
+          if-no-files-found: ignore
+
   cpp_xlang:
     name: C++ Xlang Test
     runs-on: ubuntu-latest
diff --git a/AGENTS.md b/AGENTS.md
index f205ad2d4..53f92381b 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -107,6 +107,7 @@ FORY_CSHARP_JAVA_CI=1 ENABLE_FORY_DEBUG_OUTPUT=1 mvn -T16 
test -Dtest=org.apache
 - All commands must be executed within the `cpp` directory.
 - Fory c++ use c++ 17, you must not use features from higher version of C++.
 - Bazel uses bzlmod (`MODULE.bazel`); prefer Bazel 8+.
+- For Bazel C++ tests, detect machine architecture and **only** add 
`--config=x86_64` on `x86_64`/`amd64`; on `arm64`/`aarch64`, do not enable this 
config.
 - When you updated the code, use `clang-format` to update the code
 - When invoking a method that returns `Result`, always use `FORY_TRY` unless 
in a control flow context.
 - Wrap error checks with `FORY_PREDICT_FALSE` for branch prediction 
optimization.
diff --git a/ci/run_ci.sh b/ci/run_ci.sh
index d64cfab3e..d2585231b 100755
--- a/ci/run_ci.sh
+++ b/ci/run_ci.sh
@@ -254,9 +254,16 @@ case $1 in
       "$ROOT"/ci/deploy.sh install_pyarrow
       export PATH=~/bin:$PATH
       echo "bazel version: $(bazel version)"
+      ARCH="$(uname -m)"
+      BAZEL_TEST_CONFIG=""
+      case "${ARCH}" in
+        x86_64|amd64)
+          BAZEL_TEST_CONFIG="--config=x86_64"
+          ;;
+      esac
       set +e
       echo "Executing fory c++ tests"
-      bazel test $(bazel query //...)
+      bazel test ${BAZEL_TEST_CONFIG} $(bazel query //...)
       testcode=$?
       if [[ $testcode -ne 0 ]]; then
         echo "Executing fory c++ tests failed"
diff --git a/cpp/fory/util/buffer.h b/cpp/fory/util/buffer.h
index a9dc0e1d2..b6d1be060 100644
--- a/cpp/fory/util/buffer.h
+++ b/cpp/fory/util/buffer.h
@@ -219,11 +219,11 @@ public:
   // Unsafe methods don't check bound
   template <typename T>
   FORY_ALWAYS_INLINE void unsafe_put(uint32_t offset, T value) {
-    reinterpret_cast<T *>(data_ + offset)[0] = value;
+    store_unaligned(data_ + offset, value);
   }
 
   template <typename T> FORY_ALWAYS_INLINE T unsafe_get(uint32_t offset) {
-    return reinterpret_cast<const T *>(data_ + offset)[0];
+    return load_unaligned<T>(data_ + offset);
   }
 
   template <typename T, typename = std::enable_if_t<std::disjunction_v<
@@ -255,7 +255,7 @@ public:
     FORY_CHECK(relative_offset + sizeof(T) <= size_)
         << "Out of range " << relative_offset << " should be less than "
         << size_;
-    T value = reinterpret_cast<const T *>(data_ + relative_offset)[0];
+    T value = load_unaligned<T>(data_ + relative_offset);
     return value;
   }
 
@@ -348,24 +348,21 @@ public:
     uint64_t encoded = (value & 0x7F) | 0x80;
     encoded |= (static_cast<uint64_t>(value & 0x3F80) << 1);
     if (value < 0x4000) {
-      *reinterpret_cast<uint16_t *>(data_ + offset) =
-          static_cast<uint16_t>(encoded);
+      store_unaligned<uint16_t>(data_ + offset, 
static_cast<uint16_t>(encoded));
       return 2;
     }
     encoded |= (static_cast<uint64_t>(value & 0x1FC000) << 2) | 0x8000;
     if (value < 0x200000) {
-      *reinterpret_cast<uint32_t *>(data_ + offset) =
-          static_cast<uint32_t>(encoded);
+      store_unaligned<uint32_t>(data_ + offset, 
static_cast<uint32_t>(encoded));
       return 3;
     }
     encoded |= (static_cast<uint64_t>(value & 0xFE00000) << 3) | 0x800000;
     if (value < 0x10000000) {
-      *reinterpret_cast<uint32_t *>(data_ + offset) =
-          static_cast<uint32_t>(encoded);
+      store_unaligned<uint32_t>(data_ + offset, 
static_cast<uint32_t>(encoded));
       return 4;
     }
     encoded |= (static_cast<uint64_t>(value >> 28) << 32) | 0x80000000;
-    *reinterpret_cast<uint64_t *>(data_ + offset) = encoded;
+    store_unaligned<uint64_t>(data_ + offset, encoded);
     return 5;
   }
 
@@ -381,7 +378,7 @@ public:
     // Fast path: need at least 5 bytes for safe bulk read (4 bytes + potential
     // 5th)
     if (FORY_PREDICT_TRUE(size_ - offset >= 5)) {
-      uint32_t bulk = *reinterpret_cast<uint32_t *>(data_ + offset);
+      uint32_t bulk = load_unaligned<uint32_t>(data_ + offset);
 
       uint32_t result = bulk & 0x7F;
       if ((bulk & 0x80) == 0) {
@@ -473,45 +470,42 @@ public:
     uint64_t encoded = (value & 0x7F) | 0x80;
     encoded |= ((value & 0x3F80) << 1);
     if (value < 0x4000) {
-      *reinterpret_cast<uint16_t *>(data_ + offset) =
-          static_cast<uint16_t>(encoded);
+      store_unaligned<uint16_t>(data_ + offset, 
static_cast<uint16_t>(encoded));
       return 2;
     }
     encoded |= ((value & 0x1FC000) << 2) | 0x8000;
     if (value < 0x200000) {
-      *reinterpret_cast<uint32_t *>(data_ + offset) =
-          static_cast<uint32_t>(encoded);
+      store_unaligned<uint32_t>(data_ + offset, 
static_cast<uint32_t>(encoded));
       return 3;
     }
     encoded |= ((value & 0xFE00000) << 3) | 0x800000;
     if (value < 0x10000000) {
-      *reinterpret_cast<uint32_t *>(data_ + offset) =
-          static_cast<uint32_t>(encoded);
+      store_unaligned<uint32_t>(data_ + offset, 
static_cast<uint32_t>(encoded));
       return 4;
     }
     encoded |= ((value & 0x7F0000000ULL) << 4) | 0x80000000;
     if (value < 0x800000000ULL) {
-      *reinterpret_cast<uint64_t *>(data_ + offset) = encoded;
+      store_unaligned<uint64_t>(data_ + offset, encoded);
       return 5;
     }
     encoded |= ((value & 0x3F800000000ULL) << 5) | 0x8000000000ULL;
     if (value < 0x40000000000ULL) {
-      *reinterpret_cast<uint64_t *>(data_ + offset) = encoded;
+      store_unaligned<uint64_t>(data_ + offset, encoded);
       return 6;
     }
     encoded |= ((value & 0x1FC0000000000ULL) << 6) | 0x800000000000ULL;
     if (value < 0x2000000000000ULL) {
-      *reinterpret_cast<uint64_t *>(data_ + offset) = encoded;
+      store_unaligned<uint64_t>(data_ + offset, encoded);
       return 7;
     }
     encoded |= ((value & 0xFE000000000000ULL) << 7) | 0x80000000000000ULL;
     if (value < 0x100000000000000ULL) {
-      *reinterpret_cast<uint64_t *>(data_ + offset) = encoded;
+      store_unaligned<uint64_t>(data_ + offset, encoded);
       return 8;
     }
     // 9 bytes: write 8 bytes + 1 byte for bits 56-63
     encoded |= 0x8000000000000000ULL;
-    *reinterpret_cast<uint64_t *>(data_ + offset) = encoded;
+    store_unaligned<uint64_t>(data_ + offset, encoded);
     data_[offset + 8] = static_cast<uint8_t>(value >> 56);
     return 9;
   }
@@ -528,7 +522,7 @@ public:
     }
     // Fast path: need at least 9 bytes for safe bulk read
     if (FORY_PREDICT_TRUE(size_ - offset >= 9)) {
-      uint64_t bulk = *reinterpret_cast<uint64_t *>(data_ + offset);
+      uint64_t bulk = load_unaligned<uint64_t>(data_ + offset);
 
       uint64_t result = bulk & 0x7F;
       if ((bulk & 0x80) == 0) {
@@ -617,13 +611,13 @@ public:
   /// - If bit 0 is 1: read 1 byte flag + 8 bytes uint64
   FORY_ALWAYS_INLINE uint64_t get_tagged_uint64(uint32_t offset,
                                                 uint32_t *read_bytes_length) {
-    uint32_t i = *reinterpret_cast<const uint32_t *>(data_ + offset);
+    uint32_t i = load_unaligned<uint32_t>(data_ + offset);
     if ((i & 0b1) != 0b1) {
       *read_bytes_length = 4;
       return static_cast<uint64_t>(i >> 1);
     } else {
       *read_bytes_length = 9;
-      return *reinterpret_cast<const uint64_t *>(data_ + offset + 1);
+      return load_unaligned<uint64_t>(data_ + offset + 1);
     }
   }
 
@@ -633,13 +627,13 @@ public:
   /// - If bit 0 is 1: read 1 byte flag + 8 bytes int64
   FORY_ALWAYS_INLINE int64_t get_tagged_int64(uint32_t offset,
                                               uint32_t *read_bytes_length) {
-    int32_t i = *reinterpret_cast<const int32_t *>(data_ + offset);
+    int32_t i = load_unaligned<int32_t>(data_ + offset);
     if ((i & 0b1) != 0b1) {
       *read_bytes_length = 4;
       return static_cast<int64_t>(i >> 1); // Arithmetic shift for signed
     } else {
       *read_bytes_length = 9;
-      return *reinterpret_cast<const int64_t *>(data_ + offset + 1);
+      return load_unaligned<int64_t>(data_ + offset + 1);
     }
   }
 
@@ -651,12 +645,12 @@ public:
                                                 uint64_t value) {
     constexpr uint64_t MAX_SMALL_VALUE = 0x7fffffff; // INT32_MAX as u64
     if (value <= MAX_SMALL_VALUE) {
-      *reinterpret_cast<int32_t *>(data_ + offset) = 
static_cast<int32_t>(value)
-                                                     << 1;
+      store_unaligned<int32_t>(data_ + offset, static_cast<int32_t>(value)
+                                                   << 1);
       return 4;
     } else {
       data_[offset] = 0b1;
-      *reinterpret_cast<uint64_t *>(data_ + offset + 1) = value;
+      store_unaligned<uint64_t>(data_ + offset + 1, value);
       return 9;
     }
   }
@@ -670,12 +664,12 @@ public:
     constexpr int64_t MIN_SMALL_VALUE = -1073741824; // -2^30
     constexpr int64_t MAX_SMALL_VALUE = 1073741823;  // 2^30 - 1
     if (value >= MIN_SMALL_VALUE && value <= MAX_SMALL_VALUE) {
-      *reinterpret_cast<int32_t *>(data_ + offset) = 
static_cast<int32_t>(value)
-                                                     << 1;
+      store_unaligned<int32_t>(data_ + offset, static_cast<int32_t>(value)
+                                                   << 1);
       return 4;
     } else {
       data_[offset] = 0b1;
-      *reinterpret_cast<int64_t *>(data_ + offset + 1) = value;
+      store_unaligned<int64_t>(data_ + offset + 1, value);
       return 9;
     }
   }
@@ -808,28 +802,25 @@ public:
     uint64_t encoded = (value & 0x7F) | 0x80;
     encoded |= ((value & 0x3F80) << 1);
     if (value < 0x4000) {
-      *reinterpret_cast<uint16_t *>(data_ + offset) =
-          static_cast<uint16_t>(encoded);
+      store_unaligned<uint16_t>(data_ + offset, 
static_cast<uint16_t>(encoded));
       increase_writer_index(2);
       return;
     }
     encoded |= ((value & 0x1FC000) << 2) | 0x8000;
     if (value < 0x200000) {
-      *reinterpret_cast<uint32_t *>(data_ + offset) =
-          static_cast<uint32_t>(encoded);
+      store_unaligned<uint32_t>(data_ + offset, 
static_cast<uint32_t>(encoded));
       increase_writer_index(3);
       return;
     }
     encoded |= ((value & 0xFE00000) << 3) | 0x800000;
     if (value < 0x10000000) {
-      *reinterpret_cast<uint32_t *>(data_ + offset) =
-          static_cast<uint32_t>(encoded);
+      store_unaligned<uint32_t>(data_ + offset, 
static_cast<uint32_t>(encoded));
       increase_writer_index(4);
       return;
     }
     // 5 bytes: bits 28-35 (up to 36 bits total)
     encoded |= ((value & 0xFF0000000ULL) << 4) | 0x80000000;
-    *reinterpret_cast<uint64_t *>(data_ + offset) = encoded;
+    store_unaligned<uint64_t>(data_ + offset, encoded);
     increase_writer_index(5);
   }
 
@@ -875,8 +866,7 @@ public:
     if (FORY_PREDICT_FALSE(!ensure_readable(2, error))) {
       return 0;
     }
-    uint16_t value =
-        reinterpret_cast<const uint16_t *>(data_ + reader_index_)[0];
+    uint16_t value = load_unaligned<uint16_t>(data_ + reader_index_);
     reader_index_ += 2;
     return value;
   }
@@ -886,7 +876,7 @@ public:
     if (FORY_PREDICT_FALSE(!ensure_readable(2, error))) {
       return 0;
     }
-    int16_t value = reinterpret_cast<const int16_t *>(data_ + 
reader_index_)[0];
+    int16_t value = load_unaligned<int16_t>(data_ + reader_index_);
     reader_index_ += 2;
     return value;
   }
@@ -909,8 +899,7 @@ public:
     if (FORY_PREDICT_FALSE(!ensure_readable(4, error))) {
       return 0;
     }
-    uint32_t value =
-        reinterpret_cast<const uint32_t *>(data_ + reader_index_)[0];
+    uint32_t value = load_unaligned<uint32_t>(data_ + reader_index_);
     reader_index_ += 4;
     return value;
   }
@@ -921,7 +910,7 @@ public:
     if (FORY_PREDICT_FALSE(!ensure_readable(4, error))) {
       return 0;
     }
-    int32_t value = reinterpret_cast<const int32_t *>(data_ + 
reader_index_)[0];
+    int32_t value = load_unaligned<int32_t>(data_ + reader_index_);
     reader_index_ += 4;
     return value;
   }
@@ -932,8 +921,7 @@ public:
     if (FORY_PREDICT_FALSE(!ensure_readable(8, error))) {
       return 0;
     }
-    uint64_t value =
-        reinterpret_cast<const uint64_t *>(data_ + reader_index_)[0];
+    uint64_t value = load_unaligned<uint64_t>(data_ + reader_index_);
     reader_index_ += 8;
     return value;
   }
@@ -944,7 +932,7 @@ public:
     if (FORY_PREDICT_FALSE(!ensure_readable(8, error))) {
       return 0;
     }
-    int64_t value = reinterpret_cast<const int64_t *>(data_ + 
reader_index_)[0];
+    int64_t value = load_unaligned<int64_t>(data_ + reader_index_);
     reader_index_ += 8;
     return value;
   }
@@ -954,7 +942,7 @@ public:
     if (FORY_PREDICT_FALSE(!ensure_readable(4, error))) {
       return 0.0f;
     }
-    float value = reinterpret_cast<const float *>(data_ + reader_index_)[0];
+    float value = load_unaligned<float>(data_ + reader_index_);
     reader_index_ += 4;
     return value;
   }
@@ -964,7 +952,7 @@ public:
     if (FORY_PREDICT_FALSE(!ensure_readable(8, error))) {
       return 0.0;
     }
-    double value = reinterpret_cast<const double *>(data_ + reader_index_)[0];
+    double value = load_unaligned<double>(data_ + reader_index_);
     reader_index_ += 8;
     return value;
   }
@@ -978,7 +966,7 @@ public:
       return read_var_uint32_slow(error);
     }
     uint32_t offset = reader_index_;
-    uint32_t bulk = *reinterpret_cast<uint32_t *>(data_ + offset);
+    uint32_t bulk = load_unaligned<uint32_t>(data_ + offset);
 
     uint32_t result = bulk & 0x7F;
     if ((bulk & 0x80) == 0) {
@@ -1063,7 +1051,7 @@ public:
     if (FORY_PREDICT_FALSE(!ensure_readable(4, error))) {
       return 0;
     }
-    int32_t i = reinterpret_cast<const int32_t *>(data_ + reader_index_)[0];
+    int32_t i = load_unaligned<int32_t>(data_ + reader_index_);
     if ((i & 0b1) != 0b1) {
       reader_index_ += 4;
       return static_cast<int64_t>(i >> 1); // arithmetic right shift
@@ -1071,8 +1059,7 @@ public:
       if (FORY_PREDICT_FALSE(!ensure_readable(9, error))) {
         return 0;
       }
-      int64_t value =
-          reinterpret_cast<const int64_t *>(data_ + reader_index_ + 1)[0];
+      int64_t value = load_unaligned<int64_t>(data_ + reader_index_ + 1);
       reader_index_ += 9;
       return value;
     }
@@ -1100,7 +1087,7 @@ public:
     if (FORY_PREDICT_FALSE(!ensure_readable(4, error))) {
       return 0;
     }
-    uint32_t i = reinterpret_cast<const uint32_t *>(data_ + reader_index_)[0];
+    uint32_t i = load_unaligned<uint32_t>(data_ + reader_index_);
     if ((i & 0b1) != 0b1) {
       reader_index_ += 4;
       return static_cast<uint64_t>(i >> 1);
@@ -1108,8 +1095,7 @@ public:
       if (FORY_PREDICT_FALSE(!ensure_readable(9, error))) {
         return 0;
       }
-      uint64_t value =
-          reinterpret_cast<const uint64_t *>(data_ + reader_index_ + 1)[0];
+      uint64_t value = load_unaligned<uint64_t>(data_ + reader_index_ + 1);
       reader_index_ += 9;
       return value;
     }
@@ -1125,7 +1111,7 @@ public:
       return read_var_uint36_small_slow(error);
     }
     // Fast path: need at least 8 bytes for safe bulk read.
-    uint64_t bulk = *reinterpret_cast<uint64_t *>(data_ + offset);
+    uint64_t bulk = load_unaligned<uint64_t>(data_ + offset);
     uint64_t result = bulk & 0x7F;
     if ((bulk & 0x80) == 0) {
       reader_index_ = offset + 1;
@@ -1263,6 +1249,18 @@ private:
   friend class PyInputStream;
   friend class OutputStream;
 
+  template <typename T>
+  FORY_ALWAYS_INLINE static T load_unaligned(const uint8_t *ptr) {
+    T value;
+    std::memcpy(&value, ptr, sizeof(T));
+    return value;
+  }
+
+  template <typename T>
+  FORY_ALWAYS_INLINE static void store_unaligned(uint8_t *ptr, T value) {
+    std::memcpy(ptr, &value, sizeof(T));
+  }
+
   FORY_ALWAYS_INLINE void rebind_input_stream_to_this() {
     if (input_stream_ == nullptr) {
       return;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to