This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new 87acc125f feat(c++): add configurable deserialization size guardrails 
(#3455)
87acc125f is described below

commit 87acc125f36890b4aee1795f34b50c45d3fa9858
Author: devadhe sb <[email protected]>
AuthorDate: Tue Mar 10 08:46:42 2026 +0530

    feat(c++): add configurable deserialization size guardrails (#3455)
    
    ## Why?
    
    We currently don't have any size limits for incoming payloads in the C++
    implementation. This is a security risk because a malicious or malformed
    payload can claim to have a massive collection or binary length, forcing
    the system to pre-allocate gigabytes of memory (via `.reserve()` or
    constructors) before actually reading the data. This makes the system
    vulnerable to simple Out-of-Memory (OOM) Denial-of-Service attacks.
    
    ## What does this PR do?
    
    This PR adds two essential security guardrails to the deserialization
    path: `max_binary_size` and `max_collection_size`.
    
    **Changes included:**
    
    * **Config & API**: Added the two new limits to `serialization::Config`
    and updated `ForyBuilder` so users can easily set these at runtime.
    Defaults are 64MB for binary and 1M entries for collections.
    * **Security Enforcement**:
    * Integrated checks into all sensitive pre-allocation paths, including
    `std::vector`, `std::list`, `std::deque`, `std::set`, and
    `std::unordered_set`.
    * Added entry-count validation for Maps (both fast and slow paths).
    * Specifically handled arithmetic vectors by converting byte-lengths to
    element counts to ensure `max_collection_size` is respected.
    
    
    * **Context Access**: Exposed a public `config()` accessor in
    `ReadContext` and `WriteContext` so internal serializers can reach these
    settings.
    * **Tests**: Added new test cases in `collection_serializer_test.cc` and
    `map_serializer_test.cc` to verify that deserialization fails
    immediately with a descriptive error when limits are exceeded.
    
    ## Related issues
    
    Fixes #3408
    
    ## Does this PR introduce any user-facing change?
    
    Yes, it adds two new methods (`max_binary_size` and
    `max_collection_size`) to the `ForyBuilder`.
    
    * [x] Does this PR introduce any public API change?
    * [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    The performance impact is negligible. The checks are simple integer
    comparisons performed once per collection/binary read, occurring right
    before the expensive allocation phase. All 30 existing C++ test targets
    pass with no measurable change in execution time.
---
 cpp/fory/serialization/collection_serializer.h     | 105 ++++++++++++++++++++-
 .../serialization/collection_serializer_test.cc    |  36 +++++++
 cpp/fory/serialization/config.h                    |   6 ++
 cpp/fory/serialization/context.h                   |   3 +
 cpp/fory/serialization/fory.h                      |  13 +++
 cpp/fory/serialization/map_serializer.h            |  14 +++
 cpp/fory/serialization/map_serializer_test.cc      |  16 ++++
 cpp/fory/serialization/unsigned_serializer.h       |  24 +++++
 cpp/fory/serialization/unsigned_serializer_test.cc |  18 ++++
 9 files changed, 234 insertions(+), 1 deletion(-)

diff --git a/cpp/fory/serialization/collection_serializer.h 
b/cpp/fory/serialization/collection_serializer.h
index 3768275d1..419352685 100644
--- a/cpp/fory/serialization/collection_serializer.h
+++ b/cpp/fory/serialization/collection_serializer.h
@@ -392,6 +392,13 @@ inline void collection_insert(Container &result, T &&elem) 
{
 /// Read collection data for polymorphic or shared-ref elements.
 template <typename T, typename Container>
 inline Container read_collection_data_slow(ReadContext &ctx, uint32_t length) {
+  // Guardrail: Enforce max_collection_size for collection reads
+  if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+    ctx.set_error(
+        Error::invalid_data("Collection length exceeds max_collection_size"));
+    return Container{};
+  }
+
   Container result;
   if constexpr (has_reserve_v<Container>) {
     result.reserve(length);
@@ -611,15 +618,22 @@ struct Serializer<
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::vector<T, Alloc>();
     }
+    // Guardrail: Enforce max_binary_size for binary byte-length reads
+    if (FORY_PREDICT_FALSE(total_bytes_u32 > ctx.config().max_binary_size)) {
+      ctx.set_error(Error::invalid_data("Binary size exceeds 
max_binary_size"));
+      return std::vector<T, Alloc>();
+    }
     if (sizeof(T) == 0) {
       return std::vector<T, Alloc>();
     }
+
+    size_t elem_count = total_bytes_u32 / sizeof(T);
+
     if (total_bytes_u32 % sizeof(T) != 0) {
       ctx.set_error(Error::invalid_data(
           "Vector byte size not aligned with element size"));
       return std::vector<T, Alloc>();
     }
-    size_t elem_count = total_bytes_u32 / sizeof(T);
     std::vector<T, Alloc> result(elem_count);
     if (total_bytes_u32 > 0) {
       ctx.read_bytes(result.data(), static_cast<uint32_t>(total_bytes_u32),
@@ -677,6 +691,13 @@ struct Serializer<
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::vector<T, Alloc>();
     }
+
+    if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::vector<T, Alloc>();
+    }
+
     // Per xlang spec: header and type_info are omitted when length is 0
     if (length == 0) {
       return std::vector<T, Alloc>();
@@ -808,6 +829,13 @@ struct Serializer<
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::vector<T, Alloc>();
     }
+
+    if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::vector<T, Alloc>();
+    }
+
     std::vector<T, Alloc> result;
     result.reserve(size);
     for (uint32_t i = 0; i < size; ++i) {
@@ -897,6 +925,12 @@ template <typename Alloc> struct 
Serializer<std::vector<bool, Alloc>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::vector<bool, Alloc>();
     }
+
+    if (FORY_PREDICT_FALSE(size > ctx.config().max_binary_size)) {
+      ctx.set_error(Error::invalid_data("Binary size exceeds 
max_binary_size"));
+      return std::vector<bool, Alloc>();
+    }
+
     std::vector<bool, Alloc> result(size);
     // Fast path: bulk read all bytes at once if we have enough buffer
     Buffer &buffer = ctx.buffer();
@@ -971,6 +1005,13 @@ template <typename T, typename Alloc> struct 
Serializer<std::list<T, Alloc>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::list<T, Alloc>();
     }
+
+    if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::list<T, Alloc>();
+    }
+
     // Per xlang spec: header and type_info are omitted when length is 0
     if (length == 0) {
       return std::list<T, Alloc>();
@@ -1101,6 +1142,13 @@ template <typename T, typename Alloc> struct 
Serializer<std::list<T, Alloc>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::list<T, Alloc>();
     }
+
+    if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::list<T, Alloc>();
+    }
+
     std::list<T, Alloc> result;
     for (uint32_t i = 0; i < size; ++i) {
       if (FORY_PREDICT_FALSE(ctx.has_error())) {
@@ -1161,6 +1209,13 @@ template <typename T, typename Alloc> struct 
Serializer<std::deque<T, Alloc>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::deque<T, Alloc>();
     }
+
+    if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::deque<T, Alloc>();
+    }
+
     // Per xlang spec: header and type_info are omitted when length is 0
     if (length == 0) {
       return std::deque<T, Alloc>();
@@ -1291,6 +1346,13 @@ template <typename T, typename Alloc> struct 
Serializer<std::deque<T, Alloc>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::deque<T, Alloc>();
     }
+
+    if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::deque<T, Alloc>();
+    }
+
     std::deque<T, Alloc> result;
     for (uint32_t i = 0; i < size; ++i) {
       if (FORY_PREDICT_FALSE(ctx.has_error())) {
@@ -1352,6 +1414,13 @@ struct Serializer<std::forward_list<T, Alloc>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::forward_list<T, Alloc>();
     }
+
+    if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::forward_list<T, Alloc>();
+    }
+
     // Per xlang spec: header and type_info are omitted when length is 0
     if (length == 0) {
       return std::forward_list<T, Alloc>();
@@ -1716,6 +1785,13 @@ struct Serializer<std::forward_list<T, Alloc>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::forward_list<T, Alloc>();
     }
+
+    if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::forward_list<T, Alloc>();
+    }
+
     std::vector<T> temp;
     temp.reserve(size);
     for (uint32_t i = 0; i < size; ++i) {
@@ -1814,6 +1890,13 @@ struct Serializer<std::set<T, Args...>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::set<T, Args...>();
     }
+
+    if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::set<T, Args...>();
+    }
+
     // Per xlang spec: header and type_info are omitted when length is 0
     if (size == 0) {
       return std::set<T, Args...>();
@@ -1894,6 +1977,13 @@ struct Serializer<std::set<T, Args...>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::set<T, Args...>();
     }
+
+    if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::set<T, Args...>();
+    }
+
     std::set<T, Args...> result;
     for (uint32_t i = 0; i < size; ++i) {
       if (FORY_PREDICT_FALSE(ctx.has_error())) {
@@ -1988,6 +2078,12 @@ struct Serializer<std::unordered_set<T, Args...>> {
       return std::unordered_set<T, Args...>();
     }
 
+    if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::unordered_set<T, Args...>();
+    }
+
     // Per xlang spec: header and type_info are omitted when length is 0
     if (size == 0) {
       return std::unordered_set<T, Args...>();
@@ -2070,6 +2166,13 @@ struct Serializer<std::unordered_set<T, Args...>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::unordered_set<T, Args...>();
     }
+
+    if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+      ctx.set_error(
+          Error::invalid_data("Collection length exceeds 
max_collection_size"));
+      return std::unordered_set<T, Args...>();
+    }
+
     std::unordered_set<T, Args...> result;
     result.reserve(size);
     for (uint32_t i = 0; i < size; ++i) {
diff --git a/cpp/fory/serialization/collection_serializer_test.cc 
b/cpp/fory/serialization/collection_serializer_test.cc
index 0394ff256..e50afa290 100644
--- a/cpp/fory/serialization/collection_serializer_test.cc
+++ b/cpp/fory/serialization/collection_serializer_test.cc
@@ -620,6 +620,42 @@ TEST(CollectionSerializerTest, ForwardListEmptyRoundTrip) {
   EXPECT_TRUE(deserialized.strings.empty());
 }
 
+// Test max_collection_size using objects (e.g., strings)
+TEST(CollectionSerializerTest, MaxCollectionSizeNativeGuardrail) {
+  auto fory = Fory::builder().xlang(false).max_collection_size(2).build();
+  fory.register_struct<VectorStringHolder>(200);
+
+  VectorStringHolder original;
+  original.strings = {"A", "B", "C"};
+
+  auto bytes_result = fory.serialize(original);
+  ASSERT_TRUE(bytes_result.ok());
+
+  auto deserialize_result = fory.deserialize<VectorStringHolder>(
+      bytes_result->data(), bytes_result->size());
+
+  ASSERT_FALSE(deserialize_result.ok());
+  EXPECT_TRUE(deserialize_result.error().message().find(
+                  "exceeds max_collection_size") != std::string::npos);
+}
+
+// Test max_binary_size using primitive numbers
+TEST(CollectionSerializerTest, MaxBinarySizeNativeGuardrail) {
+  auto fory = Fory::builder().xlang(false).max_binary_size(10).build();
+
+  std::vector<int32_t> large_data = {1, 2, 3, 4, 5};
+
+  auto bytes_result = fory.serialize(large_data);
+  ASSERT_TRUE(bytes_result.ok());
+
+  auto deserialize_result = fory.deserialize<std::vector<int32_t>>(
+      bytes_result->data(), bytes_result->size());
+
+  ASSERT_FALSE(deserialize_result.ok());
+  EXPECT_TRUE(deserialize_result.error().message().find(
+                  "exceeds max_binary_size") != std::string::npos);
+}
+
 } // namespace
 } // namespace serialization
 } // namespace fory
diff --git a/cpp/fory/serialization/config.h b/cpp/fory/serialization/config.h
index d471c3907..63062c7ee 100644
--- a/cpp/fory/serialization/config.h
+++ b/cpp/fory/serialization/config.h
@@ -52,6 +52,12 @@ struct Config {
   /// When enabled, avoids duplicating shared objects and handles cycles.
   bool track_ref = true;
 
+  /// Maximum allowed size for binary data in bytes.
+  uint32_t max_binary_size = 64 * 1024 * 1024; // 64MB default
+
+  /// Maximum allowed number of elements in a collection or entries in a map.
+  uint32_t max_collection_size = 1024 * 1024; // 1M elements default
+
   /// Default constructor with sensible defaults
   Config() = default;
 };
diff --git a/cpp/fory/serialization/context.h b/cpp/fory/serialization/context.h
index e080604f6..8fba3b612 100644
--- a/cpp/fory/serialization/context.h
+++ b/cpp/fory/serialization/context.h
@@ -643,6 +643,9 @@ public:
   /// reset context for reuse.
   void reset();
 
+  /// get associated configuration.
+  inline const Config &config() const { return *config_; }
+
 private:
   // Error state - accumulated during deserialization, checked at the end
   Error error_;
diff --git a/cpp/fory/serialization/fory.h b/cpp/fory/serialization/fory.h
index 1c5f19522..25cd5ec25 100644
--- a/cpp/fory/serialization/fory.h
+++ b/cpp/fory/serialization/fory.h
@@ -123,6 +123,19 @@ public:
   /// Build a thread-safe Fory instance (uses context pools).
   ThreadSafeFory build_thread_safe();
 
+  /// Set the maximum allowed size for binary data in bytes.
+  inline ForyBuilder &max_binary_size(uint32_t size) {
+    config_.max_binary_size = size;
+    return *this;
+  }
+
+  /// Set the maximum allowed number of elements in a collection or entries in 
a
+  /// map.
+  inline ForyBuilder &max_collection_size(uint32_t size) {
+    config_.max_collection_size = size;
+    return *this;
+  }
+
 private:
   Config config_;
   std::shared_ptr<TypeResolver> type_resolver_;
diff --git a/cpp/fory/serialization/map_serializer.h 
b/cpp/fory/serialization/map_serializer.h
index dd2952da9..ace9e297b 100644
--- a/cpp/fory/serialization/map_serializer.h
+++ b/cpp/fory/serialization/map_serializer.h
@@ -551,6 +551,13 @@ inline MapType read_map_data_fast(ReadContext &ctx, 
uint32_t length) {
   static_assert(!is_shared_ref_v<K> && !is_shared_ref_v<V>,
                 "Fast path is for non-shared-ref types only");
 
+  // Guardrail: Enforce max_collection_size for map reads (entry count)
+  if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+    ctx.set_error(
+        Error::invalid_data("Map entry count exceeds max_collection_size"));
+    return MapType{};
+  }
+
   MapType result;
   MapReserver<MapType>::reserve(result, length);
 
@@ -682,6 +689,13 @@ inline MapType read_map_data_fast(ReadContext &ctx, 
uint32_t length) {
 /// Read map data for polymorphic or shared-ref maps
 template <typename K, typename V, typename MapType>
 inline MapType read_map_data_slow(ReadContext &ctx, uint32_t length) {
+  // Guardrail: Enforce max_collection_size for map reads (entry count)
+  if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+    ctx.set_error(
+        Error::invalid_data("Map entry count exceeds max_collection_size"));
+    return MapType{};
+  }
+
   MapType result;
   MapReserver<MapType>::reserve(result, length);
 
diff --git a/cpp/fory/serialization/map_serializer_test.cc 
b/cpp/fory/serialization/map_serializer_test.cc
index bf91e939f..6f27c1729 100644
--- a/cpp/fory/serialization/map_serializer_test.cc
+++ b/cpp/fory/serialization/map_serializer_test.cc
@@ -780,6 +780,22 @@ TEST(MapSerializerTest, LargeMapWithPolymorphicValues) {
   EXPECT_EQ(deserialized[299]->name, "value_y_299");
 }
 
+TEST(MapSerializerTest, MaxMapSizeGuardrail) {
+  auto fory = Fory::builder().xlang(true).max_collection_size(2).build();
+
+  std::map<std::string, int32_t> large_map = {{"a", 1}, {"b", 2}, {"c", 3}};
+
+  auto serialize_result = fory.serialize(large_map);
+  ASSERT_TRUE(serialize_result.ok());
+
+  auto deserialize_result = fory.deserialize<std::map<std::string, int32_t>>(
+      serialize_result->data(), serialize_result->size());
+
+  ASSERT_FALSE(deserialize_result.ok());
+  EXPECT_TRUE(deserialize_result.error().message().find(
+                  "exceeds max_collection_size") != std::string::npos);
+}
+
 int main(int argc, char **argv) {
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
diff --git a/cpp/fory/serialization/unsigned_serializer.h 
b/cpp/fory/serialization/unsigned_serializer.h
index e53ff250f..5705b5ceb 100644
--- a/cpp/fory/serialization/unsigned_serializer.h
+++ b/cpp/fory/serialization/unsigned_serializer.h
@@ -703,6 +703,12 @@ template <> struct Serializer<std::vector<uint8_t>> {
 
   static inline std::vector<uint8_t> read_data(ReadContext &ctx) {
     uint32_t length = ctx.read_var_uint32(ctx.error());
+
+    if (FORY_PREDICT_FALSE(length > ctx.config().max_binary_size)) {
+      ctx.set_error(Error::invalid_data("Binary size exceeds 
max_binary_size"));
+      return std::vector<uint8_t>();
+    }
+
     if (FORY_PREDICT_FALSE(length > ctx.buffer().remaining_size())) {
       ctx.set_error(
           Error::invalid_data("Invalid length: " + std::to_string(length)));
@@ -798,6 +804,12 @@ template <> struct Serializer<std::vector<uint16_t>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::vector<uint16_t>();
     }
+
+    if (FORY_PREDICT_FALSE(total_bytes > ctx.config().max_binary_size)) {
+      ctx.set_error(Error::invalid_data("Binary size exceeds 
max_binary_size"));
+      return std::vector<uint16_t>();
+    }
+
     if (total_bytes % sizeof(uint16_t) != 0) {
       ctx.set_error(Error::invalid_data("Invalid length: " +
                                         std::to_string(total_bytes)));
@@ -900,6 +912,12 @@ template <> struct Serializer<std::vector<uint32_t>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::vector<uint32_t>();
     }
+
+    if (FORY_PREDICT_FALSE(total_bytes > ctx.config().max_binary_size)) {
+      ctx.set_error(Error::invalid_data("Binary size exceeds 
max_binary_size"));
+      return std::vector<uint32_t>();
+    }
+
     if (total_bytes % sizeof(uint32_t) != 0) {
       ctx.set_error(Error::invalid_data("Invalid length: " +
                                         std::to_string(total_bytes)));
@@ -1002,6 +1020,12 @@ template <> struct Serializer<std::vector<uint64_t>> {
     if (FORY_PREDICT_FALSE(ctx.has_error())) {
       return std::vector<uint64_t>();
     }
+
+    if (FORY_PREDICT_FALSE(total_bytes > ctx.config().max_binary_size)) {
+      ctx.set_error(Error::invalid_data("Binary size exceeds 
max_binary_size"));
+      return std::vector<uint64_t>();
+    }
+
     if (total_bytes % sizeof(uint64_t) != 0) {
       ctx.set_error(Error::invalid_data("Invalid length: " +
                                         std::to_string(total_bytes)));
diff --git a/cpp/fory/serialization/unsigned_serializer_test.cc 
b/cpp/fory/serialization/unsigned_serializer_test.cc
index 30196c1de..75173515e 100644
--- a/cpp/fory/serialization/unsigned_serializer_test.cc
+++ b/cpp/fory/serialization/unsigned_serializer_test.cc
@@ -271,6 +271,24 @@ TEST(UnsignedSerializerTest, 
UnsignedArrayTypeIdsAreDistinct) {
             static_cast<uint32_t>(TypeId::BINARY));
 }
 
+TEST(UnsignedSerializerTest, MaxBinarySizeNativeGuardrail) {
+  // Set limit to 10 bytes
+  auto fory = Fory::builder().xlang(false).max_binary_size(10).build();
+
+  // 10 elements of uint32_t = 40 bytes > 10 byte limit
+  std::vector<uint32_t> large_data(10, 42);
+
+  auto bytes_result = fory.serialize(large_data);
+  ASSERT_TRUE(bytes_result.ok());
+
+  auto deserialize_result = fory.deserialize<std::vector<uint32_t>>(
+      bytes_result->data(), bytes_result->size());
+
+  ASSERT_FALSE(deserialize_result.ok());
+  EXPECT_TRUE(deserialize_result.error().message().find(
+                  "exceeds max_binary_size") != std::string::npos);
+}
+
 } // namespace test
 } // namespace serialization
 } // namespace fory


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to