(tvm) branch main updated: [BugFix][Relax] Add structural_equal verification to subroutine cache lookup (#18962)

tlopex Tue, 07 Apr 2026 20:18:28 -0700

This is an automated email from the ASF dual-hosted git repository.

tlopex pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/main by this push:
     new 9acbf4ae6b [BugFix][Relax] Add structural_equal verification to 
subroutine cache lookup (#18962)
9acbf4ae6b is described below

commit 9acbf4ae6b075d441b1ed7f9bd2f37a64521aec2
Author: 3em0 <[email protected]>
AuthorDate: Wed Apr 8 11:18:05 2026 +0800

    [BugFix][Relax] Add structural_equal verification to subroutine cache 
lookup (#18962)
    
    ## Summary
    
    - `SubroutineMixin._get_subroutine()` used `structural_hash` as the sole
    cache key without `structural_equal` verification. If two different
    `arg_sinfo` values produced the same 64-bit hash (collision), the cache
    would return a previously compiled function with mismatched parameter
    shapes, leading to silently incorrect compiled output.
    - Changed the cache to store a list of `(arg_sinfo, result)` pairs per
    hash bucket and verify with `structural_equal` on lookup, consistent
    with the pattern in `block_builder.cc`.
    - Added a security advisory document and regression test.
    
    ## Root Cause
    
    The subroutine cache (`cls._gvar`) was keyed by
    `(structural_hash(arg_sinfo), is_dataflow)`. A hash match was treated as
    proof of structural equality, skipping the necessary `structural_equal`
    check. This is a hash-only lookup anti-pattern — hash determines the
    bucket, but equality must confirm the match.
    
    For comparison, `block_builder.cc` correctly uses `StructuralHash` +
    `StructuralEqual` together as the hash and equality functions for
    `std::unordered_map`.
    
    ## Test plan
    
    - [ ] Existing test `test_linear` passes (no regression)
    - [ ] New test `test_different_shapes_produce_distinct_subroutines`
    passes — verifies that the same Module class with different input shapes
    generates distinct subroutines
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    ---------
    
    Co-authored-by: [email protected] <[email protected]>
    Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
    Co-authored-by: gemini-code-assist[bot] 
<176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 docs/reference/security.rst                        | 29 ++++++++++++
 python/tvm/relax/frontend/nn/subroutine.py         | 14 ++++--
 tests/python/relax/test_frontend_nn_subroutines.py | 53 ++++++++++++++++++++++
 3 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/docs/reference/security.rst b/docs/reference/security.rst
index 8380b6c15a..de3ebf464d 100644
--- a/docs/reference/security.rst
+++ b/docs/reference/security.rst
@@ -49,3 +49,32 @@ we expect users to put in only trusted URLs.
 
 RPC data exchange between the tracker, server and client are in plain-text.
 It is recommended to use them under trusted networking environment or 
encrypted channels.
+
+
+Security Advisories
+-------------------
+
+Subroutine Cache Hash Collision
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``SubroutineMixin._get_subroutine()`` in 
``python/tvm/relax/frontend/nn/subroutine.py``
+used ``ir.structural_hash`` as the sole cache lookup key without a subsequent
+``structural_equal`` verification. If two different ``arg_sinfo`` values 
produced the
+same 64-bit hash, the cache would return a previously compiled function with
+mismatched parameter shapes, leading to silently incorrect compiled output.
+
+**Severity**: Low. The ``structural_hash`` function returns a 64-bit integer.
+A natural hash collision requires approximately 2^32 distinct inputs (birthday 
bound),
+making accidental collision extremely unlikely in normal compilation workflows.
+The issue is primarily a correctness defect rather than a practically 
exploitable
+security vulnerability.
+
+**Root Cause**: The subroutine cache (``cls._gvar``) was keyed by
+``(structural_hash(arg_sinfo, map_free_vars=True), is_dataflow)``.
+A hash match was treated as proof of structural equality, skipping the 
necessary
+``structural_equal`` check.
+
+**Fix**: The cache now stores a list of ``(arg_sinfo, result)`` pairs per hash 
bucket.
+On lookup, each candidate is verified with ``structural_equal`` before 
returning.
+This follows the standard hash-table pattern: hash for bucket selection, 
equality
+for final verification.
diff --git a/python/tvm/relax/frontend/nn/subroutine.py 
b/python/tvm/relax/frontend/nn/subroutine.py
index c62491be9f..0716f19a92 100644
--- a/python/tvm/relax/frontend/nn/subroutine.py
+++ b/python/tvm/relax/frontend/nn/subroutine.py
@@ -25,6 +25,7 @@ import re
 import typing
 
 from tvm import ir, relax
+from tvm.ir import structural_equal
 from tvm.relax.frontend import nn
 
 
@@ -141,10 +142,11 @@ class SubroutineMixin:
 
         arg_sinfo = _get_struct_info([*func_args.values(), *model_params])
         is_dataflow = block_builder.current_block_is_dataflow()
-        lookup_key = (ir.structural_hash(arg_sinfo, map_free_vars=True), 
is_dataflow)
+        lookup_key = (old_forward, ir.structural_hash(arg_sinfo, 
map_free_vars=True), is_dataflow)
 
-        if lookup_key in cls._gvar:
-            return cls._gvar[lookup_key]
+        for cached_sinfo, cached_result in cls._gvar.get(lookup_key, []):
+            if structural_equal(cached_sinfo, arg_sinfo, map_free_vars=True):
+                return cached_result
 
         func_name = _camel_to_snake(cls.__name__)
         func_params = [relax.Var(name, sinfo) for name, sinfo in 
zip(func_args, arg_sinfo.fields)]
@@ -175,5 +177,7 @@ class SubroutineMixin:
         mod = block_builder.get()
         mod.update_func(gvar, relax.utils.copy_with_new_vars(mod[gvar]))
 
-        cls._gvar[lookup_key] = (gvar, is_nn_tensor_output)
-        return cls._gvar[lookup_key]
+        result = (gvar, is_nn_tensor_output)
+        bucket = cls._gvar.setdefault(lookup_key, [])
+        bucket.append((arg_sinfo, result))
+        return result
diff --git a/tests/python/relax/test_frontend_nn_subroutines.py 
b/tests/python/relax/test_frontend_nn_subroutines.py
index 9ea44781b8..a06fa05c77 100644
--- a/tests/python/relax/test_frontend_nn_subroutines.py
+++ b/tests/python/relax/test_frontend_nn_subroutines.py
@@ -97,5 +97,58 @@ def test_linear():
     assert_structural_equal(Expected, tvm_mod, True)
 
 
+def test_different_shapes_produce_distinct_subroutines():
+    """Regression test: same Module class with different input shapes
+    must generate distinct subroutines, not reuse a cached one."""
+
+    class Linear(nn.Module):
+        define_subroutine = True
+
+        def __init__(self, in_features, out_features):
+            self.weights = nn.Parameter((in_features, out_features), 
dtype="float32")
+
+        def forward(self, input: relax.Expr) -> relax.Var:
+            return nn.op.matmul(input, self.weights)
+
+    class Model(nn.Module):
+        def __init__(self):
+            self.linear_a = Linear(32, 16)
+            self.linear_b = Linear(64, 16)
+
+        def forward(self, x: relax.Expr, y: relax.Expr) -> relax.Var:
+            a = self.linear_a(x)
+            b = self.linear_b(y)
+            return nn.op.add(a, b)
+
+    mod = Model()
+    batch_size = tvm.tirx.Var("batch_size", "int64")
+    tvm_mod, _ = mod.export_tvm(
+        spec={
+            "forward": {
+                "x": nn.spec.Tensor((batch_size, 32), "float32"),
+                "y": nn.spec.Tensor((batch_size, 64), "float32"),
+            }
+        },
+        debug=True,
+    )
+
+    # Collect all private functions (subroutines) in the module
+    subroutine_funcs = [
+        func
+        for gvar, func in tvm_mod.functions.items()
+        if isinstance(func, relax.Function)
+        and gvar.name_hint not in (
+            "forward",
+            "_initialize_effect",
+        )
+    ]
+
+    # There must be two distinct Linear subroutines (one for in_features=32,
+    # one for in_features=64), not a single cached one reused for both.
+    assert len(subroutine_funcs) == 2, (
+        f"Expected 2 distinct subroutines for different input shapes, got 
{len(subroutine_funcs)}"
+    )
+
+
 if __name__ == "__main__":
     tvm.testing.main()

(tvm) branch main updated: [BugFix][Relax] Add structural_equal verification to subroutine cache lookup (#18962)

Reply via email to