This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git
The following commit(s) were added to refs/heads/main by this push:
new 87acc125f feat(c++): add configurable deserialization size guardrails
(#3455)
87acc125f is described below
commit 87acc125f36890b4aee1795f34b50c45d3fa9858
Author: devadhe sb <[email protected]>
AuthorDate: Tue Mar 10 08:46:42 2026 +0530
feat(c++): add configurable deserialization size guardrails (#3455)
## Why?
We currently don't have any size limits for incoming payloads in the C++
implementation. This is a security risk because a malicious or malformed
payload can claim to have a massive collection or binary length, forcing
the system to pre-allocate gigabytes of memory (via `.reserve()` or
constructors) before actually reading the data. This makes the system
vulnerable to simple Out-of-Memory (OOM) Denial-of-Service attacks.
## What does this PR do?
This PR adds two essential security guardrails to the deserialization
path: `max_binary_size` and `max_collection_size`.
**Changes included:**
* **Config & API**: Added the two new limits to `serialization::Config`
and updated `ForyBuilder` so users can easily set these at runtime.
Defaults are 64MB for binary and 1M entries for collections.
* **Security Enforcement**:
* Integrated checks into all sensitive pre-allocation paths, including
`std::vector`, `std::list`, `std::deque`, `std::set`, and
`std::unordered_set`.
* Added entry-count validation for Maps (both fast and slow paths).
* Specifically handled arithmetic vectors by converting byte-lengths to
element counts to ensure `max_collection_size` is respected.
* **Context Access**: Exposed a public `config()` accessor in
`ReadContext` and `WriteContext` so internal serializers can reach these
settings.
* **Tests**: Added new test cases in `collection_serializer_test.cc` and
`map_serializer_test.cc` to verify that deserialization fails
immediately with a descriptive error when limits are exceeded.
## Related issues
Fixes #3408
## Does this PR introduce any user-facing change?
Yes, it adds two new methods (`max_binary_size` and
`max_collection_size`) to the `ForyBuilder`.
* [x] Does this PR introduce any public API change?
* [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
The performance impact is negligible. The checks are simple integer
comparisons performed once per collection/binary read, occurring right
before the expensive allocation phase. All 30 existing C++ test targets
pass with no measurable change in execution time.
---
cpp/fory/serialization/collection_serializer.h | 105 ++++++++++++++++++++-
.../serialization/collection_serializer_test.cc | 36 +++++++
cpp/fory/serialization/config.h | 6 ++
cpp/fory/serialization/context.h | 3 +
cpp/fory/serialization/fory.h | 13 +++
cpp/fory/serialization/map_serializer.h | 14 +++
cpp/fory/serialization/map_serializer_test.cc | 16 ++++
cpp/fory/serialization/unsigned_serializer.h | 24 +++++
cpp/fory/serialization/unsigned_serializer_test.cc | 18 ++++
9 files changed, 234 insertions(+), 1 deletion(-)
diff --git a/cpp/fory/serialization/collection_serializer.h
b/cpp/fory/serialization/collection_serializer.h
index 3768275d1..419352685 100644
--- a/cpp/fory/serialization/collection_serializer.h
+++ b/cpp/fory/serialization/collection_serializer.h
@@ -392,6 +392,13 @@ inline void collection_insert(Container &result, T &&elem)
{
/// Read collection data for polymorphic or shared-ref elements.
template <typename T, typename Container>
inline Container read_collection_data_slow(ReadContext &ctx, uint32_t length) {
+ // Guardrail: Enforce max_collection_size for collection reads
+ if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds max_collection_size"));
+ return Container{};
+ }
+
Container result;
if constexpr (has_reserve_v<Container>) {
result.reserve(length);
@@ -611,15 +618,22 @@ struct Serializer<
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::vector<T, Alloc>();
}
+ // Guardrail: Enforce max_binary_size for binary byte-length reads
+ if (FORY_PREDICT_FALSE(total_bytes_u32 > ctx.config().max_binary_size)) {
+ ctx.set_error(Error::invalid_data("Binary size exceeds
max_binary_size"));
+ return std::vector<T, Alloc>();
+ }
if (sizeof(T) == 0) {
return std::vector<T, Alloc>();
}
+
+ size_t elem_count = total_bytes_u32 / sizeof(T);
+
if (total_bytes_u32 % sizeof(T) != 0) {
ctx.set_error(Error::invalid_data(
"Vector byte size not aligned with element size"));
return std::vector<T, Alloc>();
}
- size_t elem_count = total_bytes_u32 / sizeof(T);
std::vector<T, Alloc> result(elem_count);
if (total_bytes_u32 > 0) {
ctx.read_bytes(result.data(), static_cast<uint32_t>(total_bytes_u32),
@@ -677,6 +691,13 @@ struct Serializer<
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::vector<T, Alloc>();
}
+
+ if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::vector<T, Alloc>();
+ }
+
// Per xlang spec: header and type_info are omitted when length is 0
if (length == 0) {
return std::vector<T, Alloc>();
@@ -808,6 +829,13 @@ struct Serializer<
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::vector<T, Alloc>();
}
+
+ if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::vector<T, Alloc>();
+ }
+
std::vector<T, Alloc> result;
result.reserve(size);
for (uint32_t i = 0; i < size; ++i) {
@@ -897,6 +925,12 @@ template <typename Alloc> struct
Serializer<std::vector<bool, Alloc>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::vector<bool, Alloc>();
}
+
+ if (FORY_PREDICT_FALSE(size > ctx.config().max_binary_size)) {
+ ctx.set_error(Error::invalid_data("Binary size exceeds
max_binary_size"));
+ return std::vector<bool, Alloc>();
+ }
+
std::vector<bool, Alloc> result(size);
// Fast path: bulk read all bytes at once if we have enough buffer
Buffer &buffer = ctx.buffer();
@@ -971,6 +1005,13 @@ template <typename T, typename Alloc> struct
Serializer<std::list<T, Alloc>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::list<T, Alloc>();
}
+
+ if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::list<T, Alloc>();
+ }
+
// Per xlang spec: header and type_info are omitted when length is 0
if (length == 0) {
return std::list<T, Alloc>();
@@ -1101,6 +1142,13 @@ template <typename T, typename Alloc> struct
Serializer<std::list<T, Alloc>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::list<T, Alloc>();
}
+
+ if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::list<T, Alloc>();
+ }
+
std::list<T, Alloc> result;
for (uint32_t i = 0; i < size; ++i) {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
@@ -1161,6 +1209,13 @@ template <typename T, typename Alloc> struct
Serializer<std::deque<T, Alloc>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::deque<T, Alloc>();
}
+
+ if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::deque<T, Alloc>();
+ }
+
// Per xlang spec: header and type_info are omitted when length is 0
if (length == 0) {
return std::deque<T, Alloc>();
@@ -1291,6 +1346,13 @@ template <typename T, typename Alloc> struct
Serializer<std::deque<T, Alloc>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::deque<T, Alloc>();
}
+
+ if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::deque<T, Alloc>();
+ }
+
std::deque<T, Alloc> result;
for (uint32_t i = 0; i < size; ++i) {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
@@ -1352,6 +1414,13 @@ struct Serializer<std::forward_list<T, Alloc>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::forward_list<T, Alloc>();
}
+
+ if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::forward_list<T, Alloc>();
+ }
+
// Per xlang spec: header and type_info are omitted when length is 0
if (length == 0) {
return std::forward_list<T, Alloc>();
@@ -1716,6 +1785,13 @@ struct Serializer<std::forward_list<T, Alloc>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::forward_list<T, Alloc>();
}
+
+ if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::forward_list<T, Alloc>();
+ }
+
std::vector<T> temp;
temp.reserve(size);
for (uint32_t i = 0; i < size; ++i) {
@@ -1814,6 +1890,13 @@ struct Serializer<std::set<T, Args...>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::set<T, Args...>();
}
+
+ if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::set<T, Args...>();
+ }
+
// Per xlang spec: header and type_info are omitted when length is 0
if (size == 0) {
return std::set<T, Args...>();
@@ -1894,6 +1977,13 @@ struct Serializer<std::set<T, Args...>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::set<T, Args...>();
}
+
+ if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::set<T, Args...>();
+ }
+
std::set<T, Args...> result;
for (uint32_t i = 0; i < size; ++i) {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
@@ -1988,6 +2078,12 @@ struct Serializer<std::unordered_set<T, Args...>> {
return std::unordered_set<T, Args...>();
}
+ if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::unordered_set<T, Args...>();
+ }
+
// Per xlang spec: header and type_info are omitted when length is 0
if (size == 0) {
return std::unordered_set<T, Args...>();
@@ -2070,6 +2166,13 @@ struct Serializer<std::unordered_set<T, Args...>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::unordered_set<T, Args...>();
}
+
+ if (FORY_PREDICT_FALSE(size > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Collection length exceeds
max_collection_size"));
+ return std::unordered_set<T, Args...>();
+ }
+
std::unordered_set<T, Args...> result;
result.reserve(size);
for (uint32_t i = 0; i < size; ++i) {
diff --git a/cpp/fory/serialization/collection_serializer_test.cc
b/cpp/fory/serialization/collection_serializer_test.cc
index 0394ff256..e50afa290 100644
--- a/cpp/fory/serialization/collection_serializer_test.cc
+++ b/cpp/fory/serialization/collection_serializer_test.cc
@@ -620,6 +620,42 @@ TEST(CollectionSerializerTest, ForwardListEmptyRoundTrip) {
EXPECT_TRUE(deserialized.strings.empty());
}
+// Test max_collection_size using objects (e.g., strings)
+TEST(CollectionSerializerTest, MaxCollectionSizeNativeGuardrail) {
+ auto fory = Fory::builder().xlang(false).max_collection_size(2).build();
+ fory.register_struct<VectorStringHolder>(200);
+
+ VectorStringHolder original;
+ original.strings = {"A", "B", "C"};
+
+ auto bytes_result = fory.serialize(original);
+ ASSERT_TRUE(bytes_result.ok());
+
+ auto deserialize_result = fory.deserialize<VectorStringHolder>(
+ bytes_result->data(), bytes_result->size());
+
+ ASSERT_FALSE(deserialize_result.ok());
+ EXPECT_TRUE(deserialize_result.error().message().find(
+ "exceeds max_collection_size") != std::string::npos);
+}
+
+// Test max_binary_size using primitive numbers
+TEST(CollectionSerializerTest, MaxBinarySizeNativeGuardrail) {
+ auto fory = Fory::builder().xlang(false).max_binary_size(10).build();
+
+ std::vector<int32_t> large_data = {1, 2, 3, 4, 5};
+
+ auto bytes_result = fory.serialize(large_data);
+ ASSERT_TRUE(bytes_result.ok());
+
+ auto deserialize_result = fory.deserialize<std::vector<int32_t>>(
+ bytes_result->data(), bytes_result->size());
+
+ ASSERT_FALSE(deserialize_result.ok());
+ EXPECT_TRUE(deserialize_result.error().message().find(
+ "exceeds max_binary_size") != std::string::npos);
+}
+
} // namespace
} // namespace serialization
} // namespace fory
diff --git a/cpp/fory/serialization/config.h b/cpp/fory/serialization/config.h
index d471c3907..63062c7ee 100644
--- a/cpp/fory/serialization/config.h
+++ b/cpp/fory/serialization/config.h
@@ -52,6 +52,12 @@ struct Config {
/// When enabled, avoids duplicating shared objects and handles cycles.
bool track_ref = true;
+ /// Maximum allowed size for binary data in bytes.
+ uint32_t max_binary_size = 64 * 1024 * 1024; // 64MB default
+
+ /// Maximum allowed number of elements in a collection or entries in a map.
+ uint32_t max_collection_size = 1024 * 1024; // 1M elements default
+
/// Default constructor with sensible defaults
Config() = default;
};
diff --git a/cpp/fory/serialization/context.h b/cpp/fory/serialization/context.h
index e080604f6..8fba3b612 100644
--- a/cpp/fory/serialization/context.h
+++ b/cpp/fory/serialization/context.h
@@ -643,6 +643,9 @@ public:
/// reset context for reuse.
void reset();
+ /// get associated configuration.
+ inline const Config &config() const { return *config_; }
+
private:
// Error state - accumulated during deserialization, checked at the end
Error error_;
diff --git a/cpp/fory/serialization/fory.h b/cpp/fory/serialization/fory.h
index 1c5f19522..25cd5ec25 100644
--- a/cpp/fory/serialization/fory.h
+++ b/cpp/fory/serialization/fory.h
@@ -123,6 +123,19 @@ public:
/// Build a thread-safe Fory instance (uses context pools).
ThreadSafeFory build_thread_safe();
+ /// Set the maximum allowed size for binary data in bytes.
+ inline ForyBuilder &max_binary_size(uint32_t size) {
+ config_.max_binary_size = size;
+ return *this;
+ }
+
+ /// Set the maximum allowed number of elements in a collection or entries in
a
+ /// map.
+ inline ForyBuilder &max_collection_size(uint32_t size) {
+ config_.max_collection_size = size;
+ return *this;
+ }
+
private:
Config config_;
std::shared_ptr<TypeResolver> type_resolver_;
diff --git a/cpp/fory/serialization/map_serializer.h
b/cpp/fory/serialization/map_serializer.h
index dd2952da9..ace9e297b 100644
--- a/cpp/fory/serialization/map_serializer.h
+++ b/cpp/fory/serialization/map_serializer.h
@@ -551,6 +551,13 @@ inline MapType read_map_data_fast(ReadContext &ctx,
uint32_t length) {
static_assert(!is_shared_ref_v<K> && !is_shared_ref_v<V>,
"Fast path is for non-shared-ref types only");
+ // Guardrail: Enforce max_collection_size for map reads (entry count)
+ if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Map entry count exceeds max_collection_size"));
+ return MapType{};
+ }
+
MapType result;
MapReserver<MapType>::reserve(result, length);
@@ -682,6 +689,13 @@ inline MapType read_map_data_fast(ReadContext &ctx,
uint32_t length) {
/// Read map data for polymorphic or shared-ref maps
template <typename K, typename V, typename MapType>
inline MapType read_map_data_slow(ReadContext &ctx, uint32_t length) {
+ // Guardrail: Enforce max_collection_size for map reads (entry count)
+ if (FORY_PREDICT_FALSE(length > ctx.config().max_collection_size)) {
+ ctx.set_error(
+ Error::invalid_data("Map entry count exceeds max_collection_size"));
+ return MapType{};
+ }
+
MapType result;
MapReserver<MapType>::reserve(result, length);
diff --git a/cpp/fory/serialization/map_serializer_test.cc
b/cpp/fory/serialization/map_serializer_test.cc
index bf91e939f..6f27c1729 100644
--- a/cpp/fory/serialization/map_serializer_test.cc
+++ b/cpp/fory/serialization/map_serializer_test.cc
@@ -780,6 +780,22 @@ TEST(MapSerializerTest, LargeMapWithPolymorphicValues) {
EXPECT_EQ(deserialized[299]->name, "value_y_299");
}
+TEST(MapSerializerTest, MaxMapSizeGuardrail) {
+ auto fory = Fory::builder().xlang(true).max_collection_size(2).build();
+
+ std::map<std::string, int32_t> large_map = {{"a", 1}, {"b", 2}, {"c", 3}};
+
+ auto serialize_result = fory.serialize(large_map);
+ ASSERT_TRUE(serialize_result.ok());
+
+ auto deserialize_result = fory.deserialize<std::map<std::string, int32_t>>(
+ serialize_result->data(), serialize_result->size());
+
+ ASSERT_FALSE(deserialize_result.ok());
+ EXPECT_TRUE(deserialize_result.error().message().find(
+ "exceeds max_collection_size") != std::string::npos);
+}
+
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
diff --git a/cpp/fory/serialization/unsigned_serializer.h
b/cpp/fory/serialization/unsigned_serializer.h
index e53ff250f..5705b5ceb 100644
--- a/cpp/fory/serialization/unsigned_serializer.h
+++ b/cpp/fory/serialization/unsigned_serializer.h
@@ -703,6 +703,12 @@ template <> struct Serializer<std::vector<uint8_t>> {
static inline std::vector<uint8_t> read_data(ReadContext &ctx) {
uint32_t length = ctx.read_var_uint32(ctx.error());
+
+ if (FORY_PREDICT_FALSE(length > ctx.config().max_binary_size)) {
+ ctx.set_error(Error::invalid_data("Binary size exceeds
max_binary_size"));
+ return std::vector<uint8_t>();
+ }
+
if (FORY_PREDICT_FALSE(length > ctx.buffer().remaining_size())) {
ctx.set_error(
Error::invalid_data("Invalid length: " + std::to_string(length)));
@@ -798,6 +804,12 @@ template <> struct Serializer<std::vector<uint16_t>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::vector<uint16_t>();
}
+
+ if (FORY_PREDICT_FALSE(total_bytes > ctx.config().max_binary_size)) {
+ ctx.set_error(Error::invalid_data("Binary size exceeds
max_binary_size"));
+ return std::vector<uint16_t>();
+ }
+
if (total_bytes % sizeof(uint16_t) != 0) {
ctx.set_error(Error::invalid_data("Invalid length: " +
std::to_string(total_bytes)));
@@ -900,6 +912,12 @@ template <> struct Serializer<std::vector<uint32_t>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::vector<uint32_t>();
}
+
+ if (FORY_PREDICT_FALSE(total_bytes > ctx.config().max_binary_size)) {
+ ctx.set_error(Error::invalid_data("Binary size exceeds
max_binary_size"));
+ return std::vector<uint32_t>();
+ }
+
if (total_bytes % sizeof(uint32_t) != 0) {
ctx.set_error(Error::invalid_data("Invalid length: " +
std::to_string(total_bytes)));
@@ -1002,6 +1020,12 @@ template <> struct Serializer<std::vector<uint64_t>> {
if (FORY_PREDICT_FALSE(ctx.has_error())) {
return std::vector<uint64_t>();
}
+
+ if (FORY_PREDICT_FALSE(total_bytes > ctx.config().max_binary_size)) {
+ ctx.set_error(Error::invalid_data("Binary size exceeds
max_binary_size"));
+ return std::vector<uint64_t>();
+ }
+
if (total_bytes % sizeof(uint64_t) != 0) {
ctx.set_error(Error::invalid_data("Invalid length: " +
std::to_string(total_bytes)));
diff --git a/cpp/fory/serialization/unsigned_serializer_test.cc
b/cpp/fory/serialization/unsigned_serializer_test.cc
index 30196c1de..75173515e 100644
--- a/cpp/fory/serialization/unsigned_serializer_test.cc
+++ b/cpp/fory/serialization/unsigned_serializer_test.cc
@@ -271,6 +271,24 @@ TEST(UnsignedSerializerTest,
UnsignedArrayTypeIdsAreDistinct) {
static_cast<uint32_t>(TypeId::BINARY));
}
+TEST(UnsignedSerializerTest, MaxBinarySizeNativeGuardrail) {
+ // Set limit to 10 bytes
+ auto fory = Fory::builder().xlang(false).max_binary_size(10).build();
+
+ // 10 elements of uint32_t = 40 bytes > 10 byte limit
+ std::vector<uint32_t> large_data(10, 42);
+
+ auto bytes_result = fory.serialize(large_data);
+ ASSERT_TRUE(bytes_result.ok());
+
+ auto deserialize_result = fory.deserialize<std::vector<uint32_t>>(
+ bytes_result->data(), bytes_result->size());
+
+ ASSERT_FALSE(deserialize_result.ok());
+ EXPECT_TRUE(deserialize_result.error().message().find(
+ "exceeds max_binary_size") != std::string::npos);
+}
+
} // namespace test
} // namespace serialization
} // namespace fory
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]